; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86


define i16 @mask16(i16 %x) {
; CHECK-LABEL: mask16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    notl %eax
; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: mask16:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    notl %eax
; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
; X86-NEXT:    retl
  %m0 = bitcast i16 %x to <16 x i1>
  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
  %ret = bitcast <16 x i1> %m1 to i16
  ret i16 %ret
}

define i32 @mask16_zext(i16 %x) {
; CHECK-LABEL: mask16_zext:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    notl %edi
; CHECK-NEXT:    movzwl %di, %eax
; CHECK-NEXT:    retq
;
; X86-LABEL: mask16_zext:
; X86:       ## %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    xorl $65535, %eax ## imm = 0xFFFF
; X86-NEXT:    retl
  %m0 = bitcast i16 %x to <16 x i1>
  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
  %m2 = bitcast <16 x i1> %m1 to i16
  %ret = zext i16 %m2 to i32
  ret i32 %ret
}

define i8 @mask8(i8 %x) {
; CHECK-LABEL: mask8:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    notb %al
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: mask8:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    notb %al
; X86-NEXT:    retl
  %m0 = bitcast i8 %x to <8 x i1>
  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
  %ret = bitcast <8 x i1> %m1 to i8
  ret i8 %ret
}

define i32 @mask8_zext(i8 %x) {
; CHECK-LABEL: mask8_zext:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    notb %dil
; CHECK-NEXT:    movzbl %dil, %eax
; CHECK-NEXT:    retq
;
; X86-LABEL: mask8_zext:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    notb %al
; X86-NEXT:    movzbl %al, %eax
; X86-NEXT:    retl
  %m0 = bitcast i8 %x to <8 x i1>
  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
  %m2 = bitcast <8 x i1> %m1 to i8
  %ret = zext i8 %m2 to i32
  ret i32 %ret
}

define void @mask16_mem(i16* %ptr) {
; CHECK-LABEL: mask16_mem:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    kmovw (%rdi), %k0
; CHECK-NEXT:    knotw %k0, %k0
; CHECK-NEXT:    kmovw %k0, (%rdi)
; CHECK-NEXT:    retq
;
; X86-LABEL: mask16_mem:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovw (%eax), %k0
; X86-NEXT:    knotw %k0, %k0
; X86-NEXT:    kmovw %k0, (%eax)
; X86-NEXT:    retl
  %x = load i16, i16* %ptr, align 4
  %m0 = bitcast i16 %x to <16 x i1>
  %m1 = xor <16 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
  %ret = bitcast <16 x i1> %m1 to i16
  store i16 %ret, i16* %ptr, align 4
  ret void
}

define void @mask8_mem(i8* %ptr) {
; KNL-LABEL: mask8_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    notb (%rdi)
; KNL-NEXT:    retq
;
; SKX-LABEL: mask8_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovb (%rdi), %k0
; SKX-NEXT:    knotb %k0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: mask8_mem:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    notb (%rdi)
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: mask8_mem:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovb (%rdi), %k0
; AVX512DQ-NEXT:    knotb %k0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: mask8_mem:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovb (%eax), %k0
; X86-NEXT:    knotb %k0, %k0
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  %x = load i8, i8* %ptr, align 4
  %m0 = bitcast i8 %x to <8 x i1>
  %m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
  %ret = bitcast <8 x i1> %m1 to i8
  store i8 %ret, i8* %ptr, align 4
  ret void
}

define i16 @mand16(i16 %x, i16 %y) {
; CHECK-LABEL: mand16:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    movl %edi, %ecx
; CHECK-NEXT:    andl %esi, %ecx
; CHECK-NEXT:    xorl %esi, %eax
; CHECK-NEXT:    orl %ecx, %eax
; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: mand16:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl %eax, %edx
; X86-NEXT:    andl %ecx, %edx
; X86-NEXT:    xorl %ecx, %eax
; X86-NEXT:    orl %edx, %eax
; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
; X86-NEXT:    retl
  %ma = bitcast i16 %x to <16 x i1>
  %mb = bitcast i16 %y to <16 x i1>
  %mc = and <16 x i1> %ma, %mb
  %md = xor <16 x i1> %ma, %mb
  %me = or <16 x i1> %mc, %md
  %ret = bitcast <16 x i1> %me to i16
  ret i16 %ret
}

define i16 @mand16_mem(<16 x i1>* %x, <16 x i1>* %y) {
; KNL-LABEL: mand16_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw (%rdi), %k0
; KNL-NEXT:    kmovw (%rsi), %k1
; KNL-NEXT:    kandw %k1, %k0, %k2
; KNL-NEXT:    kxorw %k1, %k0, %k0
; KNL-NEXT:    korw %k0, %k2, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
; KNL-NEXT:    retq
;
; SKX-LABEL: mand16_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovw (%rdi), %k0
; SKX-NEXT:    kmovw (%rsi), %k1
; SKX-NEXT:    kandw %k1, %k0, %k2
; SKX-NEXT:    kxorw %k1, %k0, %k0
; SKX-NEXT:    korw %k0, %k2, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: mand16_mem:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovw (%rdi), %k0
; AVX512BW-NEXT:    kmovw (%rsi), %k1
; AVX512BW-NEXT:    kandw %k1, %k0, %k2
; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
; AVX512BW-NEXT:    korw %k0, %k2, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: mand16_mem:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw (%rdi), %k0
; AVX512DQ-NEXT:    kmovw (%rsi), %k1
; AVX512DQ-NEXT:    kandw %k1, %k0, %k2
; AVX512DQ-NEXT:    kxorw %k1, %k0, %k0
; AVX512DQ-NEXT:    korw %k0, %k2, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: mand16_mem:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    kmovw (%ecx), %k0
; X86-NEXT:    kmovw (%eax), %k1
; X86-NEXT:    kandw %k1, %k0, %k2
; X86-NEXT:    kxorw %k1, %k0, %k0
; X86-NEXT:    korw %k0, %k2, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
; X86-NEXT:    retl
  %ma = load <16 x i1>, <16 x i1>* %x
  %mb = load <16 x i1>, <16 x i1>* %y
  %mc = and <16 x i1> %ma, %mb
  %md = xor <16 x i1> %ma, %mb
  %me = or <16 x i1> %mc, %md
  %ret = bitcast <16 x i1> %me to i16
  ret i16 %ret
}

define i8 @shuf_test1(i16 %v) nounwind {
; KNL-LABEL: shuf_test1:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kshiftrw $8, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    ## kill: def $al killed $al killed $eax
; KNL-NEXT:    retq
;
; SKX-LABEL: shuf_test1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovd %edi, %k0
; SKX-NEXT:    kshiftrw $8, %k0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    ## kill: def $al killed $al killed $eax
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: shuf_test1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovd %edi, %k0
; AVX512BW-NEXT:    kshiftrw $8, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: shuf_test1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw %edi, %k0
; AVX512DQ-NEXT:    kshiftrw $8, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: shuf_test1:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    retl
   %v1 = bitcast i16 %v to <16 x i1>
   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %mask1 = bitcast <8 x i1> %mask to i8
   ret i8 %mask1
}

define i32 @zext_test1(<16 x i32> %a, <16 x i32> %b) {
; KNL-LABEL: zext_test1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; KNL-NEXT:    kshiftrw $5, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    andl $1, %eax
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: zext_test1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; SKX-NEXT:    kshiftrw $5, %k0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    andl $1, %eax
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: zext_test1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    andl $1, %eax
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: zext_test1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    andl $1, %eax
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: zext_test1:
; X86:       ## %bb.0:
; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; X86-NEXT:    kshiftrw $5, %k0, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    andl $1, %eax
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %cmp_res = icmp ugt <16 x i32> %a, %b
  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
  %res = zext i1 %cmp_res.i1 to i32
  ret i32 %res
}

define i16 @zext_test2(<16 x i32> %a, <16 x i32> %b) {
; KNL-LABEL: zext_test2:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; KNL-NEXT:    kshiftrw $5, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    andl $1, %eax
; KNL-NEXT:    ## kill: def $ax killed $ax killed $eax
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: zext_test2:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; SKX-NEXT:    kshiftrw $5, %k0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    andl $1, %eax
; SKX-NEXT:    ## kill: def $ax killed $ax killed $eax
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: zext_test2:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    andl $1, %eax
; AVX512BW-NEXT:    ## kill: def $ax killed $ax killed $eax
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: zext_test2:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    andl $1, %eax
; AVX512DQ-NEXT:    ## kill: def $ax killed $ax killed $eax
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: zext_test2:
; X86:       ## %bb.0:
; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; X86-NEXT:    kshiftrw $5, %k0, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    andl $1, %eax
; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %cmp_res = icmp ugt <16 x i32> %a, %b
  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
  %res = zext i1 %cmp_res.i1 to i16
  ret i16 %res
}

define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) {
; KNL-LABEL: zext_test3:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; KNL-NEXT:    kshiftrw $5, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    andb $1, %al
; KNL-NEXT:    ## kill: def $al killed $al killed $eax
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: zext_test3:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; SKX-NEXT:    kshiftrw $5, %k0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    andb $1, %al
; SKX-NEXT:    ## kill: def $al killed $al killed $eax
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: zext_test3:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; AVX512BW-NEXT:    kshiftrw $5, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    andb $1, %al
; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: zext_test3:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; AVX512DQ-NEXT:    kshiftrw $5, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    andb $1, %al
; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: zext_test3:
; X86:       ## %bb.0:
; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
; X86-NEXT:    kshiftrw $5, %k0, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    andb $1, %al
; X86-NEXT:    ## kill: def $al killed $al killed $eax
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %cmp_res = icmp ugt <16 x i32> %a, %b
  %cmp_res.i1 = extractelement <16 x i1> %cmp_res, i32 5
  %res = zext i1 %cmp_res.i1 to i8
  ret i8 %res
}

define i8 @conv1(<8 x i1>* %R) {
; CHECK-LABEL: conv1:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movb $-1, (%rdi)
; CHECK-NEXT:    movb $-2, -{{[0-9]+}}(%rsp)
; CHECK-NEXT:    movb $-2, %al
; CHECK-NEXT:    retq
;
; X86-LABEL: conv1:
; X86:       ## %bb.0: ## %entry
; X86-NEXT:    pushl %eax
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movb $-1, (%eax)
; X86-NEXT:    movb $-2, {{[0-9]+}}(%esp)
; X86-NEXT:    movb $-2, %al
; X86-NEXT:    popl %ecx
; X86-NEXT:    retl
entry:
  store <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %R

  %maskPtr = alloca <8 x i1>
  store <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, <8 x i1>* %maskPtr
  %mask = load <8 x i1>, <8 x i1>* %maskPtr
  %mask_convert = bitcast <8 x i1> %mask to i8
  ret i8 %mask_convert
}

define <4 x i32> @test4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64> %y1) {
; KNL-LABEL: test4:
; KNL:       ## %bb.0:
; KNL-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
; KNL-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
; KNL-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
; KNL-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test4:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1
; SKX-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 {%k1}
; SKX-NEXT:    vpmovm2d %k0, %xmm0
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test4:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512BW-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
; AVX512BW-NEXT:    vpcmpleq %zmm1, %zmm0, %k1 {%k1}
; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test4:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
; AVX512DQ-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512DQ-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512DQ-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT:    vpcmpgtq %zmm3, %zmm2, %k1
; AVX512DQ-NEXT:    vpcmpleq %zmm1, %zmm0, %k0 {%k1}
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test4:
; X86:       ## %bb.0:
; X86-NEXT:    vpcmpgtq %ymm3, %ymm2, %k1
; X86-NEXT:    vpcmpleq %ymm1, %ymm0, %k0 {%k1}
; X86-NEXT:    vpmovm2d %k0, %xmm0
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %x_gt_y = icmp sgt <4 x i64> %x, %y
  %x1_gt_y1 = icmp sgt <4 x i64> %x1, %y1
  %res = icmp sgt <4 x i1>%x_gt_y, %x1_gt_y1
  %resse = sext <4 x i1>%res to <4 x i32>
  ret <4 x i32> %resse
}

define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1) {
; KNL-LABEL: test5:
; KNL:       ## %bb.0:
; KNL-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
; KNL-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
; KNL-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
; KNL-NEXT:    vpcmpleq %zmm3, %zmm2, %k1 {%k1}
; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test5:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
; SKX-NEXT:    vpcmpleq %xmm3, %xmm2, %k0 {%k1}
; SKX-NEXT:    vpmovm2q %k0, %xmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test5:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
; AVX512BW-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
; AVX512BW-NEXT:    vpcmpleq %zmm3, %zmm2, %k1 {%k1}
; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test5:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    ## kill: def $xmm3 killed $xmm3 def $zmm3
; AVX512DQ-NEXT:    ## kill: def $xmm2 killed $xmm2 def $zmm2
; AVX512DQ-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1
; AVX512DQ-NEXT:    vpcmpleq %zmm3, %zmm2, %k0 {%k1}
; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test5:
; X86:       ## %bb.0:
; X86-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1
; X86-NEXT:    vpcmpleq %xmm3, %xmm2, %k0 {%k1}
; X86-NEXT:    vpmovm2q %k0, %xmm0
; X86-NEXT:    retl
  %x_gt_y = icmp slt <2 x i64> %x, %y
  %x1_gt_y1 = icmp sgt <2 x i64> %x1, %y1
  %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
  %resse = sext <2 x i1>%res to <2 x i64>
  ret <2 x i64> %resse
}define void @test6(<16 x i1> %mask)  {
allocas:
  %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
  %b = bitcast <16 x i1> %a to i16
  %c = icmp eq i16 %b, 0
  br i1 %c, label %true, label %false

true:
  ret void

false:
  ret void
}
define void @test7(<8 x i1> %mask)  {
; KNL-LABEL: test7:
; KNL:       ## %bb.0: ## %allocas
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    orb $85, %al
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test7:
; SKX:       ## %bb.0: ## %allocas
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
; SKX-NEXT:    vpmovw2m %xmm0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    orb $85, %al
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test7:
; AVX512BW:       ## %bb.0: ## %allocas
; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    orb $85, %al
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test7:
; AVX512DQ:       ## %bb.0: ## %allocas
; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    orb $85, %al
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test7:
; X86:       ## %bb.0: ## %allocas
; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
; X86-NEXT:    vpmovw2m %xmm0, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    orb $85, %al
; X86-NEXT:    retl
allocas:
  %a= or <8 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
  %b = bitcast <8 x i1> %a to i8
  %c = icmp eq i8 %b, 0
  br i1 %c, label %true, label %false

true:
  ret void

false:
  ret void
}
define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
; KNL-LABEL: test8:
; KNL:       ## %bb.0:
; KNL-NEXT:    cmpl %esi, %edi
; KNL-NEXT:    jg LBB17_1
; KNL-NEXT:  ## %bb.2:
; KNL-NEXT:    kxorw %k0, %k0, %k1
; KNL-NEXT:    jmp LBB17_3
; KNL-NEXT:  LBB17_1:
; KNL-NEXT:    vpxor %xmm1, %xmm1, %xmm1
; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
; KNL-NEXT:  LBB17_3:
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdb %zmm0, %xmm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test8:
; SKX:       ## %bb.0:
; SKX-NEXT:    cmpl %esi, %edi
; SKX-NEXT:    jg LBB17_1
; SKX-NEXT:  ## %bb.2:
; SKX-NEXT:    kxorw %k0, %k0, %k0
; SKX-NEXT:    vpmovm2b %k0, %xmm0
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB17_1:
; SKX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
; SKX-NEXT:    vpmovm2b %k0, %xmm0
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test8:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    cmpl %esi, %edi
; AVX512BW-NEXT:    jg LBB17_1
; AVX512BW-NEXT:  ## %bb.2:
; AVX512BW-NEXT:    kxorw %k0, %k0, %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB17_1:
; AVX512BW-NEXT:    vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test8:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    cmpl %esi, %edi
; AVX512DQ-NEXT:    jg LBB17_1
; AVX512DQ-NEXT:  ## %bb.2:
; AVX512DQ-NEXT:    kxorw %k0, %k0, %k0
; AVX512DQ-NEXT:    jmp LBB17_3
; AVX512DQ-NEXT:  LBB17_1:
; AVX512DQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
; AVX512DQ-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
; AVX512DQ-NEXT:  LBB17_3:
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test8:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    jg LBB17_1
; X86-NEXT:  ## %bb.2:
; X86-NEXT:    kxorw %k0, %k0, %k0
; X86-NEXT:    vpmovm2b %k0, %xmm0
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB17_1:
; X86-NEXT:    vpxor %xmm1, %xmm1, %xmm1
; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
; X86-NEXT:    vpmovm2b %k0, %xmm0
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %cond = icmp sgt i32 %a1, %b1
  %cmp1 = icmp sgt <16 x i32> %a, zeroinitializer
  %cmp2 = icmp ult <16 x i32> %b, zeroinitializer
  %mix = select i1 %cond, <16 x i1> %cmp1, <16 x i1> %cmp2
  %res = sext <16 x i1> %mix to <16 x i8>
  ret <16 x i8> %res
}
define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
; KNL-LABEL: test9:
; KNL:       ## %bb.0:
; KNL-NEXT:    cmpl %esi, %edi
; KNL-NEXT:    jg LBB18_1
; KNL-NEXT:  ## %bb.2:
; KNL-NEXT:    vpmovsxbd %xmm1, %zmm0
; KNL-NEXT:    jmp LBB18_3
; KNL-NEXT:  LBB18_1:
; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
; KNL-NEXT:  LBB18_3:
; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdb %zmm0, %xmm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test9:
; SKX:       ## %bb.0:
; SKX-NEXT:    cmpl %esi, %edi
; SKX-NEXT:    jg LBB18_1
; SKX-NEXT:  ## %bb.2:
; SKX-NEXT:    vpsllw $7, %xmm1, %xmm0
; SKX-NEXT:    jmp LBB18_3
; SKX-NEXT:  LBB18_1:
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
; SKX-NEXT:  LBB18_3:
; SKX-NEXT:    vpmovb2m %xmm0, %k0
; SKX-NEXT:    vpmovm2b %k0, %xmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test9:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    cmpl %esi, %edi
; AVX512BW-NEXT:    jg LBB18_1
; AVX512BW-NEXT:  ## %bb.2:
; AVX512BW-NEXT:    vpsllw $7, %xmm1, %xmm0
; AVX512BW-NEXT:    jmp LBB18_3
; AVX512BW-NEXT:  LBB18_1:
; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT:  LBB18_3:
; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test9:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    cmpl %esi, %edi
; AVX512DQ-NEXT:    jg LBB18_1
; AVX512DQ-NEXT:  ## %bb.2:
; AVX512DQ-NEXT:    vpmovsxbd %xmm1, %zmm0
; AVX512DQ-NEXT:    jmp LBB18_3
; AVX512DQ-NEXT:  LBB18_1:
; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT:  LBB18_3:
; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test9:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    jg LBB18_1
; X86-NEXT:  ## %bb.2:
; X86-NEXT:    vpsllw $7, %xmm1, %xmm0
; X86-NEXT:    jmp LBB18_3
; X86-NEXT:  LBB18_1:
; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
; X86-NEXT:  LBB18_3:
; X86-NEXT:    vpmovb2m %xmm0, %k0
; X86-NEXT:    vpmovm2b %k0, %xmm0
; X86-NEXT:    retl
  %mask = icmp sgt i32 %a1, %b1
  %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
  ret <16 x i1>%c
}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
  %mask = icmp sgt i32 %a1, %b1
  %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
  ret <8 x i1>%c
}

define <4 x i1> @test11(<4 x i1>%a, <4 x i1>%b, i32 %a1, i32 %b1) {
; KNL-LABEL: test11:
; KNL:       ## %bb.0:
; KNL-NEXT:    cmpl %esi, %edi
; KNL-NEXT:    jg LBB20_1
; KNL-NEXT:  ## %bb.2:
; KNL-NEXT:    vpslld $31, %xmm1, %xmm0
; KNL-NEXT:    jmp LBB20_3
; KNL-NEXT:  LBB20_1:
; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
; KNL-NEXT:  LBB20_3:
; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test11:
; SKX:       ## %bb.0:
; SKX-NEXT:    cmpl %esi, %edi
; SKX-NEXT:    jg LBB20_1
; SKX-NEXT:  ## %bb.2:
; SKX-NEXT:    vpslld $31, %xmm1, %xmm0
; SKX-NEXT:    jmp LBB20_3
; SKX-NEXT:  LBB20_1:
; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
; SKX-NEXT:  LBB20_3:
; SKX-NEXT:    vpmovd2m %xmm0, %k0
; SKX-NEXT:    vpmovm2d %k0, %xmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test11:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    cmpl %esi, %edi
; AVX512BW-NEXT:    jg LBB20_1
; AVX512BW-NEXT:  ## %bb.2:
; AVX512BW-NEXT:    vpslld $31, %xmm1, %xmm0
; AVX512BW-NEXT:    jmp LBB20_3
; AVX512BW-NEXT:  LBB20_1:
; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512BW-NEXT:  LBB20_3:
; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k1
; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test11:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    cmpl %esi, %edi
; AVX512DQ-NEXT:    jg LBB20_1
; AVX512DQ-NEXT:  ## %bb.2:
; AVX512DQ-NEXT:    vpslld $31, %xmm1, %xmm0
; AVX512DQ-NEXT:    jmp LBB20_3
; AVX512DQ-NEXT:  LBB20_1:
; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512DQ-NEXT:  LBB20_3:
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test11:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    jg LBB20_1
; X86-NEXT:  ## %bb.2:
; X86-NEXT:    vpslld $31, %xmm1, %xmm0
; X86-NEXT:    jmp LBB20_3
; X86-NEXT:  LBB20_1:
; X86-NEXT:    vpslld $31, %xmm0, %xmm0
; X86-NEXT:  LBB20_3:
; X86-NEXT:    vpmovd2m %xmm0, %k0
; X86-NEXT:    vpmovm2d %k0, %xmm0
; X86-NEXT:    retl
  %mask = icmp sgt i32 %a1, %b1
  %c = select i1 %mask, <4 x i1>%a, <4 x i1>%b
  ret <4 x i1>%c
}

define i32 @test12(i32 %x, i32 %y)  {
; CHECK-LABEL: test12:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test12:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
  %a = bitcast i16 21845 to <16 x i1>
  %b = extractelement <16 x i1> %a, i32 0
  %c = select i1 %b, i32 %x, i32 %y
  ret i32 %c
}

define i32 @test13(i32 %x, i32 %y)  {
; CHECK-LABEL: test13:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %esi, %eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test13:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
  %a = bitcast i16 21845 to <16 x i1>
  %b = extractelement <16 x i1> %a, i32 3
  %c = select i1 %b, i32 %x, i32 %y
  ret i32 %c
}

; Make sure we don't crash on a large vector.
define i32 @test13_crash(i32 %x, i32 %y)  {
; CHECK-LABEL: test13_crash:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test13_crash:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    retl
  %a = bitcast i128 2184568686868686868686868686 to <128 x i1>
  %b = extractelement <128 x i1> %a, i32 3
  %c = select i1 %b, i32 %x, i32 %y
  ret i32 %c
}

define <4 x i1> @test14()  {
; CHECK-LABEL: test14:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,0,1]
; CHECK-NEXT:    retq
;
; X86-LABEL: test14:
; X86:       ## %bb.0:
; X86-NEXT:    vmovaps {{.*#+}} xmm0 = [1,1,0,1]
; X86-NEXT:    retl
  %a = bitcast i16 21845 to <16 x i1>
  %b = extractelement <16 x i1> %a, i32 2
  %c = insertelement <4 x i1> <i1 true, i1 false, i1 false, i1 true>, i1 %b, i32 1
  ret <4 x i1> %c
}

define <16 x i1> @test15(i32 %x, i32 %y)  {
; KNL-LABEL: test15:
; KNL:       ## %bb.0:
; KNL-NEXT:    cmpl %esi, %edi
; KNL-NEXT:    movl $21845, %eax ## imm = 0x5555
; KNL-NEXT:    movl $1, %ecx
; KNL-NEXT:    cmovgl %eax, %ecx
; KNL-NEXT:    kmovw %ecx, %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdb %zmm0, %xmm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test15:
; SKX:       ## %bb.0:
; SKX-NEXT:    cmpl %esi, %edi
; SKX-NEXT:    movl $21845, %eax ## imm = 0x5555
; SKX-NEXT:    movl $1, %ecx
; SKX-NEXT:    cmovgl %eax, %ecx
; SKX-NEXT:    kmovd %ecx, %k0
; SKX-NEXT:    vpmovm2b %k0, %xmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test15:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    cmpl %esi, %edi
; AVX512BW-NEXT:    movl $21845, %eax ## imm = 0x5555
; AVX512BW-NEXT:    movl $1, %ecx
; AVX512BW-NEXT:    cmovgl %eax, %ecx
; AVX512BW-NEXT:    kmovd %ecx, %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test15:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    cmpl %esi, %edi
; AVX512DQ-NEXT:    movl $21845, %eax ## imm = 0x5555
; AVX512DQ-NEXT:    movl $1, %ecx
; AVX512DQ-NEXT:    cmovgl %eax, %ecx
; AVX512DQ-NEXT:    kmovw %ecx, %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test15:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl $21845, %eax ## imm = 0x5555
; X86-NEXT:    movl $1, %ecx
; X86-NEXT:    cmovgl %eax, %ecx
; X86-NEXT:    kmovd %ecx, %k0
; X86-NEXT:    vpmovm2b %k0, %xmm0
; X86-NEXT:    retl
  %a = bitcast i16 21845 to <16 x i1>
  %b = bitcast i16 1 to <16 x i1>
  %mask = icmp sgt i32 %x, %y
  %c = select i1 %mask, <16 x i1> %a, <16 x i1> %b
  ret <16 x i1> %c
}

define <64 x i8> @test16(i64 %x) {
;
; KNL-LABEL: test16:
; KNL:       ## %bb.0:
; KNL-NEXT:    movq %rdi, %rax
; KNL-NEXT:    movl %edi, %ecx
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    shrq $32, %rdi
; KNL-NEXT:    shrq $48, %rax
; KNL-NEXT:    shrl $16, %ecx
; KNL-NEXT:    kmovw %ecx, %k1
; KNL-NEXT:    kmovw %eax, %k2
; KNL-NEXT:    kmovw %edi, %k3
; KNL-NEXT:    movw $-33, %ax
; KNL-NEXT:    kmovw %eax, %k4
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movb $1, %al
; KNL-NEXT:    kmovw %eax, %k4
; KNL-NEXT:    kshiftlw $15, %k4, %k4
; KNL-NEXT:    kshiftrw $10, %k4, %k4
; KNL-NEXT:    korw %k4, %k0, %k4
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
; KNL-NEXT:    vpmovdb %zmm0, %xmm0
; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT:    vpmovdb %zmm1, %xmm1
; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
; KNL-NEXT:    vpmovdb %zmm1, %xmm1
; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT:    vpmovdb %zmm2, %xmm2
; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test16:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovq %rdi, %k0
; SKX-NEXT:    movq $-33, %rax
; SKX-NEXT:    kmovq %rax, %k1
; SKX-NEXT:    kandq %k1, %k0, %k0
; SKX-NEXT:    movb $1, %al
; SKX-NEXT:    kmovd %eax, %k1
; SKX-NEXT:    kshiftlq $63, %k1, %k1
; SKX-NEXT:    kshiftrq $58, %k1, %k1
; SKX-NEXT:    korq %k1, %k0, %k0
; SKX-NEXT:    vpmovm2b %k0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test16:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovq %rdi, %k0
; AVX512BW-NEXT:    movq $-33, %rax
; AVX512BW-NEXT:    kmovq %rax, %k1
; AVX512BW-NEXT:    kandq %k1, %k0, %k0
; AVX512BW-NEXT:    movb $1, %al
; AVX512BW-NEXT:    kmovd %eax, %k1
; AVX512BW-NEXT:    kshiftlq $63, %k1, %k1
; AVX512BW-NEXT:    kshiftrq $58, %k1, %k1
; AVX512BW-NEXT:    korq %k1, %k0, %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test16:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    movq %rdi, %rax
; AVX512DQ-NEXT:    movl %edi, %ecx
; AVX512DQ-NEXT:    kmovw %edi, %k1
; AVX512DQ-NEXT:    shrq $32, %rdi
; AVX512DQ-NEXT:    shrq $48, %rax
; AVX512DQ-NEXT:    shrl $16, %ecx
; AVX512DQ-NEXT:    kmovw %ecx, %k0
; AVX512DQ-NEXT:    kmovw %eax, %k2
; AVX512DQ-NEXT:    kmovw %edi, %k3
; AVX512DQ-NEXT:    movw $-33, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k4
; AVX512DQ-NEXT:    kandw %k4, %k1, %k1
; AVX512DQ-NEXT:    movb $1, %al
; AVX512DQ-NEXT:    kmovw %eax, %k4
; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT:    kshiftrw $10, %k4, %k4
; AVX512DQ-NEXT:    korw %k4, %k1, %k1
; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm0
; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm1
; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm2
; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test16:
; X86:       ## %bb.0:
; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kshiftrq $6, %k0, %k1
; X86-NEXT:    kshiftlq $6, %k1, %k1
; X86-NEXT:    kshiftlq $59, %k0, %k0
; X86-NEXT:    kshiftrq $59, %k0, %k0
; X86-NEXT:    movb $1, %al
; X86-NEXT:    kmovd %eax, %k2
; X86-NEXT:    kshiftlq $63, %k2, %k2
; X86-NEXT:    kshiftrq $58, %k2, %k2
; X86-NEXT:    korq %k2, %k1, %k1
; X86-NEXT:    korq %k1, %k0, %k0
; X86-NEXT:    vpmovm2b %k0, %zmm0
; X86-NEXT:    retl
  %a = bitcast i64 %x to <64 x i1>
  %b = insertelement <64 x i1>%a, i1 true, i32 5
  %c = sext <64 x i1>%b to <64 x i8>
  ret <64 x i8>%c
}

define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) {
;
; KNL-LABEL: test17:
; KNL:       ## %bb.0:
; KNL-NEXT:    movq %rdi, %rax
; KNL-NEXT:    movl %edi, %ecx
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    shrq $32, %rdi
; KNL-NEXT:    shrq $48, %rax
; KNL-NEXT:    shrl $16, %ecx
; KNL-NEXT:    kmovw %ecx, %k1
; KNL-NEXT:    kmovw %eax, %k2
; KNL-NEXT:    kmovw %edi, %k3
; KNL-NEXT:    cmpl %edx, %esi
; KNL-NEXT:    setg %al
; KNL-NEXT:    movw $-33, %cx
; KNL-NEXT:    kmovw %ecx, %k4
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    kmovw %eax, %k4
; KNL-NEXT:    kshiftlw $15, %k4, %k4
; KNL-NEXT:    kshiftrw $10, %k4, %k4
; KNL-NEXT:    korw %k4, %k0, %k4
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
; KNL-NEXT:    vpmovdb %zmm0, %xmm0
; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT:    vpmovdb %zmm1, %xmm1
; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
; KNL-NEXT:    vpmovdb %zmm1, %xmm1
; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; KNL-NEXT:    vpmovdb %zmm2, %xmm2
; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test17:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovq %rdi, %k0
; SKX-NEXT:    cmpl %edx, %esi
; SKX-NEXT:    setg %al
; SKX-NEXT:    movq $-33, %rcx
; SKX-NEXT:    kmovq %rcx, %k1
; SKX-NEXT:    kandq %k1, %k0, %k0
; SKX-NEXT:    kmovd %eax, %k1
; SKX-NEXT:    kshiftlq $63, %k1, %k1
; SKX-NEXT:    kshiftrq $58, %k1, %k1
; SKX-NEXT:    korq %k1, %k0, %k0
; SKX-NEXT:    vpmovm2b %k0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test17:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovq %rdi, %k0
; AVX512BW-NEXT:    cmpl %edx, %esi
; AVX512BW-NEXT:    setg %al
; AVX512BW-NEXT:    movq $-33, %rcx
; AVX512BW-NEXT:    kmovq %rcx, %k1
; AVX512BW-NEXT:    kandq %k1, %k0, %k0
; AVX512BW-NEXT:    kmovd %eax, %k1
; AVX512BW-NEXT:    kshiftlq $63, %k1, %k1
; AVX512BW-NEXT:    kshiftrq $58, %k1, %k1
; AVX512BW-NEXT:    korq %k1, %k0, %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test17:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    movq %rdi, %rax
; AVX512DQ-NEXT:    movl %edi, %ecx
; AVX512DQ-NEXT:    kmovw %edi, %k1
; AVX512DQ-NEXT:    shrq $32, %rdi
; AVX512DQ-NEXT:    shrq $48, %rax
; AVX512DQ-NEXT:    shrl $16, %ecx
; AVX512DQ-NEXT:    kmovw %ecx, %k0
; AVX512DQ-NEXT:    kmovw %eax, %k2
; AVX512DQ-NEXT:    kmovw %edi, %k3
; AVX512DQ-NEXT:    cmpl %edx, %esi
; AVX512DQ-NEXT:    setg %al
; AVX512DQ-NEXT:    movw $-33, %cx
; AVX512DQ-NEXT:    kmovw %ecx, %k4
; AVX512DQ-NEXT:    kandw %k4, %k1, %k1
; AVX512DQ-NEXT:    kmovw %eax, %k4
; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT:    kshiftrw $10, %k4, %k4
; AVX512DQ-NEXT:    korw %k4, %k1, %k1
; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm0
; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm1
; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm1
; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm2
; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test17:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0
; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    setg %al
; X86-NEXT:    kshiftrq $6, %k0, %k1
; X86-NEXT:    kshiftlq $6, %k1, %k1
; X86-NEXT:    kshiftlq $59, %k0, %k0
; X86-NEXT:    kshiftrq $59, %k0, %k0
; X86-NEXT:    kmovd %eax, %k2
; X86-NEXT:    kshiftlq $63, %k2, %k2
; X86-NEXT:    kshiftrq $58, %k2, %k2
; X86-NEXT:    korq %k2, %k1, %k1
; X86-NEXT:    korq %k1, %k0, %k0
; X86-NEXT:    vpmovm2b %k0, %zmm0
; X86-NEXT:    retl
  %a = bitcast i64 %x to <64 x i1>
  %b = icmp sgt i32 %y, %z
  %c = insertelement <64 x i1>%a, i1 %b, i32 5
  %d = sext <64 x i1>%c to <64 x i8>
  ret <64 x i8>%d
}

define <8 x i1> @test18(i8 %a, i16 %y) {
; KNL-LABEL: test18:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kmovw %esi, %k1
; KNL-NEXT:    kshiftrw $8, %k1, %k2
; KNL-NEXT:    kshiftrw $9, %k1, %k1
; KNL-NEXT:    movw $-65, %ax
; KNL-NEXT:    kmovw %eax, %k3
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    kshiftlw $6, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    kshiftlw $9, %k0, %k0
; KNL-NEXT:    kshiftrw $9, %k0, %k0
; KNL-NEXT:    kshiftlw $7, %k2, %k1
; KNL-NEXT:    korw %k1, %k0, %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdw %zmm0, %ymm0
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test18:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovd %edi, %k0
; SKX-NEXT:    kmovd %esi, %k1
; SKX-NEXT:    kshiftrw $8, %k1, %k2
; SKX-NEXT:    kshiftrw $9, %k1, %k1
; SKX-NEXT:    movb $-65, %al
; SKX-NEXT:    kmovd %eax, %k3
; SKX-NEXT:    kandb %k3, %k0, %k0
; SKX-NEXT:    kshiftlb $6, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kshiftlb $1, %k0, %k0
; SKX-NEXT:    kshiftrb $1, %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k2, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    vpmovm2w %k0, %xmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test18:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovd %edi, %k0
; AVX512BW-NEXT:    kmovd %esi, %k1
; AVX512BW-NEXT:    kshiftrw $8, %k1, %k2
; AVX512BW-NEXT:    kshiftrw $9, %k1, %k1
; AVX512BW-NEXT:    movw $-65, %ax
; AVX512BW-NEXT:    kmovd %eax, %k3
; AVX512BW-NEXT:    kandw %k3, %k0, %k0
; AVX512BW-NEXT:    kshiftlw $6, %k1, %k1
; AVX512BW-NEXT:    korw %k1, %k0, %k0
; AVX512BW-NEXT:    kshiftlw $9, %k0, %k0
; AVX512BW-NEXT:    kshiftrw $9, %k0, %k0
; AVX512BW-NEXT:    kshiftlw $7, %k2, %k1
; AVX512BW-NEXT:    korw %k1, %k0, %k0
; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test18:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw %edi, %k0
; AVX512DQ-NEXT:    kmovw %esi, %k1
; AVX512DQ-NEXT:    kshiftrw $8, %k1, %k2
; AVX512DQ-NEXT:    kshiftrw $9, %k1, %k1
; AVX512DQ-NEXT:    movb $-65, %al
; AVX512DQ-NEXT:    kmovw %eax, %k3
; AVX512DQ-NEXT:    kandb %k3, %k0, %k0
; AVX512DQ-NEXT:    kshiftlb $6, %k1, %k1
; AVX512DQ-NEXT:    korb %k1, %k0, %k0
; AVX512DQ-NEXT:    kshiftlb $1, %k0, %k0
; AVX512DQ-NEXT:    kshiftrb $1, %k0, %k0
; AVX512DQ-NEXT:    kshiftlb $7, %k2, %k1
; AVX512DQ-NEXT:    korb %k1, %k0, %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test18:
; X86:       ## %bb.0:
; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    kshiftrw $8, %k1, %k2
; X86-NEXT:    kshiftrw $9, %k1, %k1
; X86-NEXT:    movb $-65, %al
; X86-NEXT:    kmovd %eax, %k3
; X86-NEXT:    kandb %k3, %k0, %k0
; X86-NEXT:    kshiftlb $6, %k1, %k1
; X86-NEXT:    korb %k1, %k0, %k0
; X86-NEXT:    kshiftlb $1, %k0, %k0
; X86-NEXT:    kshiftrb $1, %k0, %k0
; X86-NEXT:    kshiftlb $7, %k2, %k1
; X86-NEXT:    korb %k1, %k0, %k0
; X86-NEXT:    vpmovm2w %k0, %xmm0
; X86-NEXT:    retl
  %b = bitcast i8 %a to <8 x i1>
  %b1 = bitcast i16 %y to <16 x i1>
  %el1 = extractelement <16 x i1>%b1, i32 8
  %el2 = extractelement <16 x i1>%b1, i32 9
  %c = insertelement <8 x i1>%b, i1 %el1, i32 7
  %d = insertelement <8 x i1>%c, i1 %el2, i32 6
  ret <8 x i1>%d
}
define <32 x i16> @test21(<32 x i16> %x , <32 x i1> %mask) nounwind readnone {
; KNL-LABEL: test21:
; KNL:       ## %bb.0:
; KNL-NEXT:    vextracti128 $1, %ymm1, %xmm2
; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; KNL-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
; KNL-NEXT:    vpsllw $15, %ymm2, %ymm2
; KNL-NEXT:    vpsraw $15, %ymm2, %ymm2
; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test21:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $7, %ymm1, %ymm1
; SKX-NEXT:    vpmovb2m %ymm1, %k1
; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test21:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $7, %ymm1, %ymm1
; AVX512BW-NEXT:    vpmovb2m %zmm1, %k1
; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test21:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vextracti128 $1, %ymm1, %xmm2
; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; AVX512DQ-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512DQ-NEXT:    vpsllw $15, %ymm1, %ymm1
; AVX512DQ-NEXT:    vpsraw $15, %ymm1, %ymm1
; AVX512DQ-NEXT:    vpsllw $15, %ymm2, %ymm2
; AVX512DQ-NEXT:    vpsraw $15, %ymm2, %ymm2
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512DQ-NEXT:    vpandq %zmm0, %zmm1, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test21:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllw $7, %ymm1, %ymm1
; X86-NEXT:    vpmovb2m %ymm1, %k1
; X86-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
  ret <32 x i16> %ret
}

define void @test22(<4 x i1> %a, <4 x i1>* %addr) {
; KNL-LABEL: test22:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT:    kshiftlw $12, %k0, %k0
; KNL-NEXT:    kshiftrw $12, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test22:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
; SKX-NEXT:    vpmovd2m %xmm0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test22:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    kshiftlw $12, %k0, %k0
; AVX512BW-NEXT:    kshiftrw $12, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test22:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    kshiftlb $4, %k0, %k0
; AVX512DQ-NEXT:    kshiftrb $4, %k0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test22:
; X86:       ## %bb.0:
; X86-NEXT:    vpslld $31, %xmm0, %xmm0
; X86-NEXT:    vpmovd2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  store <4 x i1> %a, <4 x i1>* %addr
  ret void
}

define void @test23(<2 x i1> %a, <2 x i1>* %addr) {
; KNL-LABEL: test23:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT:    kshiftlw $14, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test23:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
; SKX-NEXT:    vpmovq2m %xmm0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test23:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    kshiftlw $14, %k0, %k0
; AVX512BW-NEXT:    kshiftrw $14, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test23:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT:    kshiftlb $6, %k0, %k0
; AVX512DQ-NEXT:    kshiftrb $6, %k0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test23:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
; X86-NEXT:    vpmovq2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  store <2 x i1> %a, <2 x i1>* %addr
  ret void
}

define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
; KNL-LABEL: store_v1i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    knotw %k0, %k0
; KNL-NEXT:    kshiftlw $15, %k0, %k0
; KNL-NEXT:    kshiftrw $15, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rsi)
; KNL-NEXT:    retq
;
; SKX-LABEL: store_v1i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovd %edi, %k0
; SKX-NEXT:    knotw %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $7, %k0, %k0
; SKX-NEXT:    kmovb %k0, (%rsi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_v1i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovd %edi, %k0
; AVX512BW-NEXT:    knotw %k0, %k0
; AVX512BW-NEXT:    kshiftlw $15, %k0, %k0
; AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rsi)
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_v1i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw %edi, %k0
; AVX512DQ-NEXT:    knotw %k0, %k0
; AVX512DQ-NEXT:    kshiftlb $7, %k0, %k0
; AVX512DQ-NEXT:    kshiftrb $7, %k0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rsi)
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_v1i1:
; X86:       ## %bb.0:
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    knotw %k0, %k0
; X86-NEXT:    kshiftlb $7, %k0, %k0
; X86-NEXT:    kshiftrb $7, %k0, %k0
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  %x = xor <1 x i1> %c, <i1 1>
  store <1 x i1> %x, <1 x i1>*  %ptr, align 4
  ret void
}

define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
; KNL-LABEL: store_v2i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpsllq $63, %xmm0, %xmm0
; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; KNL-NEXT:    kshiftlw $14, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_v2i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
; SKX-NEXT:    vpmovq2m %xmm0, %k0
; SKX-NEXT:    knotw %k0, %k0
; SKX-NEXT:    kshiftlb $6, %k0, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_v2i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllq $63, %xmm0, %xmm0
; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    kshiftlw $14, %k0, %k0
; AVX512BW-NEXT:    kshiftrw $14, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_v2i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpsllq $63, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT:    knotw %k0, %k0
; AVX512DQ-NEXT:    kshiftlb $6, %k0, %k0
; AVX512DQ-NEXT:    kshiftrb $6, %k0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_v2i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllq $63, %xmm0, %xmm0
; X86-NEXT:    vpmovq2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    knotw %k0, %k0
; X86-NEXT:    kshiftlb $6, %k0, %k0
; X86-NEXT:    kshiftrb $6, %k0, %k0
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  %x = xor <2 x i1> %c, <i1 1, i1 1>
  store <2 x i1> %x, <2 x i1>*  %ptr, align 4
  ret void
}

define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
; KNL-LABEL: store_v4i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpslld $31, %xmm0, %xmm0
; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT:    kshiftlw $12, %k0, %k0
; KNL-NEXT:    kshiftrw $12, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_v4i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
; SKX-NEXT:    vpmovd2m %xmm0, %k0
; SKX-NEXT:    knotw %k0, %k0
; SKX-NEXT:    kshiftlb $4, %k0, %k0
; SKX-NEXT:    kshiftrb $4, %k0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_v4i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    kshiftlw $12, %k0, %k0
; AVX512BW-NEXT:    kshiftrw $12, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_v4i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpslld $31, %xmm0, %xmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    knotw %k0, %k0
; AVX512DQ-NEXT:    kshiftlb $4, %k0, %k0
; AVX512DQ-NEXT:    kshiftrb $4, %k0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_v4i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpslld $31, %xmm0, %xmm0
; X86-NEXT:    vpmovd2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    knotw %k0, %k0
; X86-NEXT:    kshiftlb $4, %k0, %k0
; X86-NEXT:    kshiftrb $4, %k0, %k0
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  %x = xor <4 x i1> %c, <i1 1, i1 1, i1 1, i1 1>
  store <4 x i1> %x, <4 x i1>*  %ptr, align 4
  ret void
}

define void @store_v8i1(<8 x i1> %c , <8 x i1>* %ptr) {
; KNL-LABEL: store_v8i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_v8i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
; SKX-NEXT:    vpmovw2m %xmm0, %k0
; SKX-NEXT:    knotb %k0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_v8i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
; AVX512BW-NEXT:    knotw %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_v8i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT:    knotb %k0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_v8i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
; X86-NEXT:    vpmovw2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    knotb %k0, %k0
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  %x = xor <8 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
  store <8 x i1> %x, <8 x i1>*  %ptr, align 4
  ret void
}

define void @store_v16i1(<16 x i1> %c , <16 x i1>* %ptr) {
; KNL-LABEL: store_v16i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_v16i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
; SKX-NEXT:    vpmovb2m %xmm0, %k0
; SKX-NEXT:    knotw %k0, %k0
; SKX-NEXT:    kmovw %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_v16i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
; AVX512BW-NEXT:    knotw %k0, %k0
; AVX512BW-NEXT:    kmovw %k0, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_v16i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    knotw %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_v16i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
; X86-NEXT:    vpmovb2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    knotw %k0, %k0
; X86-NEXT:    kmovw %k0, (%eax)
; X86-NEXT:    retl
  %x = xor <16 x i1> %c, <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>
  store <16 x i1> %x, <16 x i1>*  %ptr, align 4
  ret void
}

;void f2(int);
;void f1(int c)
;{
;  static int v = 0;
;  if (v == 0)
;    v = 1;
;  else
;    v = 0;
;  f2(v);
;}

@f1.v = internal unnamed_addr global i1 false, align 4

define void @f1(i32 %c) {
; CHECK-LABEL: f1:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movzbl _f1.v(%rip), %edi
; CHECK-NEXT:    xorl $1, %edi
; CHECK-NEXT:    movb %dil, _f1.v(%rip)
; CHECK-NEXT:    jmp _f2 ## TAILCALL
;
; X86-LABEL: f1:
; X86:       ## %bb.0: ## %entry
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    movzbl _f1.v, %eax
; X86-NEXT:    xorl $1, %eax
; X86-NEXT:    movb %al, _f1.v
; X86-NEXT:    movl %eax, (%esp)
; X86-NEXT:    calll _f2
; X86-NEXT:    addl $12, %esp
; X86-NEXT:    retl
entry:
  %.b1 = load i1, i1* @f1.v, align 4
  %not..b1 = xor i1 %.b1, true
  store i1 %not..b1, i1* @f1.v, align 4
  %0 = zext i1 %not..b1 to i32
  tail call void @f2(i32 %0) #2
  ret void
}

declare void @f2(i32) #1

define void @store_i16_i1(i16 %x, i1 *%y) {
; CHECK-LABEL: store_i16_i1:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    andl $1, %edi
; CHECK-NEXT:    movb %dil, (%rsi)
; CHECK-NEXT:    retq
;
; X86-LABEL: store_i16_i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT:    andl $1, %ecx
; X86-NEXT:    movb %cl, (%eax)
; X86-NEXT:    retl
  %c = trunc i16 %x to i1
  store i1 %c, i1* %y
  ret void
}

define void @store_i8_i1(i8 %x, i1 *%y) {
; CHECK-LABEL: store_i8_i1:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    andl $1, %edi
; CHECK-NEXT:    movb %dil, (%rsi)
; CHECK-NEXT:    retq
;
; X86-LABEL: store_i8_i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
; X86-NEXT:    andb $1, %cl
; X86-NEXT:    movb %cl, (%eax)
; X86-NEXT:    retl
  %c = trunc i8 %x to i1
  store i1 %c, i1* %y
  ret void
}

define <32 x i16> @test_build_vec_v32i1(<32 x i16> %x) {
; KNL-LABEL: test_build_vec_v32i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_build_vec_v32i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test_build_vec_v32i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test_build_vec_v32i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test_build_vec_v32i1:
; X86:       ## %bb.0:
; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
; X86-NEXT:    retl
  %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
  ret <32 x i16> %ret
}

define <32 x i16> @test_build_vec_v32i1_optsize(<32 x i16> %x) optsize {
; KNL-LABEL: test_build_vec_v32i1_optsize:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_build_vec_v32i1_optsize:
; SKX:       ## %bb.0:
; SKX-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
; SKX-NEXT:    kmovd %eax, %k1
; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test_build_vec_v32i1_optsize:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
; AVX512BW-NEXT:    kmovd %eax, %k1
; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test_build_vec_v32i1_optsize:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test_build_vec_v32i1_optsize:
; X86:       ## %bb.0:
; X86-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
  %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
  ret <32 x i16> %ret
}

define <32 x i16> @test_build_vec_v32i1_pgso(<32 x i16> %x) !prof !14 {
; KNL-LABEL: test_build_vec_v32i1_pgso:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_build_vec_v32i1_pgso:
; SKX:       ## %bb.0:
; SKX-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
; SKX-NEXT:    kmovd %eax, %k1
; SKX-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test_build_vec_v32i1_pgso:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
; AVX512BW-NEXT:    kmovd %eax, %k1
; AVX512BW-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test_build_vec_v32i1_pgso:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test_build_vec_v32i1_pgso:
; X86:       ## %bb.0:
; X86-NEXT:    movl $1497715861, %eax ## imm = 0x59455495
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vmovdqu16 %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
  %ret = select <32 x i1> <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <32 x i16> %x, <32 x i16> zeroinitializer
  ret <32 x i16> %ret
}

define <64 x i8> @test_build_vec_v64i1(<64 x i8> %x) {
; KNL-LABEL: test_build_vec_v64i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: test_build_vec_v64i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test_build_vec_v64i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test_build_vec_v64i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test_build_vec_v64i1:
; X86:       ## %bb.0:
; X86-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
; X86-NEXT:    retl
  %ret = select <64 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false>, <64 x i8> %x, <64 x i8> zeroinitializer
  ret <64 x i8> %ret
}

define void @ktest_1(<8 x double> %in, double * %base) {
; KNL-LABEL: ktest_1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1
; KNL-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
; KNL-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    testb %al, %al
; KNL-NEXT:    je LBB44_2
; KNL-NEXT:  ## %bb.1: ## %L1
; KNL-NEXT:    vmovapd %zmm0, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB44_2: ## %L2
; KNL-NEXT:    vmovapd %zmm0, 8(%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1
; SKX-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
; SKX-NEXT:    vcmpltpd %zmm1, %zmm0, %k0
; SKX-NEXT:    ktestb %k0, %k1
; SKX-NEXT:    je LBB44_2
; SKX-NEXT:  ## %bb.1: ## %L1
; SKX-NEXT:    vmovapd %zmm0, (%rdi)
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB44_2: ## %L2
; SKX-NEXT:    vmovapd %zmm0, 8(%rdi)
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1
; AVX512BW-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
; AVX512BW-NEXT:    vcmpltpd %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    testb %al, %al
; AVX512BW-NEXT:    je LBB44_2
; AVX512BW-NEXT:  ## %bb.1: ## %L1
; AVX512BW-NEXT:    vmovapd %zmm0, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB44_2: ## %L2
; AVX512BW-NEXT:    vmovapd %zmm0, 8(%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vcmpgtpd (%rdi), %zmm0, %k1
; AVX512DQ-NEXT:    vmovupd 8(%rdi), %zmm1 {%k1} {z}
; AVX512DQ-NEXT:    vcmpltpd %zmm1, %zmm0, %k0
; AVX512DQ-NEXT:    ktestb %k0, %k1
; AVX512DQ-NEXT:    je LBB44_2
; AVX512DQ-NEXT:  ## %bb.1: ## %L1
; AVX512DQ-NEXT:    vmovapd %zmm0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB44_2: ## %L2
; AVX512DQ-NEXT:    vmovapd %zmm0, 8(%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vcmpgtpd (%eax), %zmm0, %k1
; X86-NEXT:    vmovupd 8(%eax), %zmm1 {%k1} {z}
; X86-NEXT:    vcmpltpd %zmm1, %zmm0, %k0
; X86-NEXT:    ktestb %k0, %k1
; X86-NEXT:    je LBB44_2
; X86-NEXT:  ## %bb.1: ## %L1
; X86-NEXT:    vmovapd %zmm0, (%eax)
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB44_2: ## %L2
; X86-NEXT:    vmovapd %zmm0, 8(%eax)
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %addr1 = getelementptr double, double * %base, i64 0
  %addr2 = getelementptr double, double * %base, i64 1

  %vaddr1 = bitcast double* %addr1 to <8 x double>*
  %vaddr2 = bitcast double* %addr2 to <8 x double>*

  %val1 = load <8 x double>, <8 x double> *%vaddr1, align 1
  %val2 = load <8 x double>, <8 x double> *%vaddr2, align 1

  %sel1 = fcmp ogt <8 x double>%in, %val1
  %val3 = select <8 x i1> %sel1, <8 x double> %val2, <8 x double> zeroinitializer
  %sel2 = fcmp olt <8 x double> %in, %val3
  %sel3 = and <8 x i1> %sel1, %sel2

  %int_sel3 = bitcast <8 x i1> %sel3 to i8
  %res = icmp eq i8 %int_sel3, zeroinitializer
  br i1 %res, label %L2, label %L1
L1:
  store <8 x double> %in, <8 x double>* %vaddr1
  br label %End
L2:
  store <8 x double> %in, <8 x double>* %vaddr2
  br label %End
End:
  ret void
}

define void @ktest_2(<32 x float> %in, float * %base) {
;
; KNL-LABEL: ktest_2:
; KNL:       ## %bb.0:
; KNL-NEXT:    vcmpgtps (%rdi), %zmm0, %k1
; KNL-NEXT:    vcmpgtps 64(%rdi), %zmm1, %k2
; KNL-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
; KNL-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
; KNL-NEXT:    vcmpltps %zmm3, %zmm0, %k0
; KNL-NEXT:    vcmpltps %zmm2, %zmm1, %k3
; KNL-NEXT:    korw %k3, %k2, %k2
; KNL-NEXT:    korw %k0, %k1, %k0
; KNL-NEXT:    kortestw %k2, %k0
; KNL-NEXT:    je LBB45_2
; KNL-NEXT:  ## %bb.1: ## %L1
; KNL-NEXT:    vmovaps %zmm0, (%rdi)
; KNL-NEXT:    vmovaps %zmm1, 64(%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB45_2: ## %L2
; KNL-NEXT:    vmovaps %zmm0, 4(%rdi)
; KNL-NEXT:    vmovaps %zmm1, 68(%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_2:
; SKX:       ## %bb.0:
; SKX-NEXT:    vcmpgtps (%rdi), %zmm0, %k1
; SKX-NEXT:    vcmpgtps 64(%rdi), %zmm1, %k2
; SKX-NEXT:    kunpckwd %k1, %k2, %k0
; SKX-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
; SKX-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
; SKX-NEXT:    vcmpltps %zmm3, %zmm0, %k1
; SKX-NEXT:    vcmpltps %zmm2, %zmm1, %k2
; SKX-NEXT:    kunpckwd %k1, %k2, %k1
; SKX-NEXT:    kortestd %k1, %k0
; SKX-NEXT:    je LBB45_2
; SKX-NEXT:  ## %bb.1: ## %L1
; SKX-NEXT:    vmovaps %zmm0, (%rdi)
; SKX-NEXT:    vmovaps %zmm1, 64(%rdi)
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB45_2: ## %L2
; SKX-NEXT:    vmovaps %zmm0, 4(%rdi)
; SKX-NEXT:    vmovaps %zmm1, 68(%rdi)
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_2:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vcmpgtps (%rdi), %zmm0, %k1
; AVX512BW-NEXT:    vcmpgtps 64(%rdi), %zmm1, %k2
; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k0
; AVX512BW-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
; AVX512BW-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
; AVX512BW-NEXT:    vcmpltps %zmm3, %zmm0, %k1
; AVX512BW-NEXT:    vcmpltps %zmm2, %zmm1, %k2
; AVX512BW-NEXT:    kunpckwd %k1, %k2, %k1
; AVX512BW-NEXT:    kortestd %k1, %k0
; AVX512BW-NEXT:    je LBB45_2
; AVX512BW-NEXT:  ## %bb.1: ## %L1
; AVX512BW-NEXT:    vmovaps %zmm0, (%rdi)
; AVX512BW-NEXT:    vmovaps %zmm1, 64(%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB45_2: ## %L2
; AVX512BW-NEXT:    vmovaps %zmm0, 4(%rdi)
; AVX512BW-NEXT:    vmovaps %zmm1, 68(%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_2:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vcmpgtps (%rdi), %zmm0, %k1
; AVX512DQ-NEXT:    vcmpgtps 64(%rdi), %zmm1, %k2
; AVX512DQ-NEXT:    vmovups 68(%rdi), %zmm2 {%k2} {z}
; AVX512DQ-NEXT:    vmovups 4(%rdi), %zmm3 {%k1} {z}
; AVX512DQ-NEXT:    vcmpltps %zmm3, %zmm0, %k0
; AVX512DQ-NEXT:    vcmpltps %zmm2, %zmm1, %k3
; AVX512DQ-NEXT:    korw %k3, %k2, %k2
; AVX512DQ-NEXT:    korw %k0, %k1, %k0
; AVX512DQ-NEXT:    kortestw %k2, %k0
; AVX512DQ-NEXT:    je LBB45_2
; AVX512DQ-NEXT:  ## %bb.1: ## %L1
; AVX512DQ-NEXT:    vmovaps %zmm0, (%rdi)
; AVX512DQ-NEXT:    vmovaps %zmm1, 64(%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB45_2: ## %L2
; AVX512DQ-NEXT:    vmovaps %zmm0, 4(%rdi)
; AVX512DQ-NEXT:    vmovaps %zmm1, 68(%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_2:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vcmpgtps (%eax), %zmm0, %k1
; X86-NEXT:    vcmpgtps 64(%eax), %zmm1, %k2
; X86-NEXT:    kunpckwd %k1, %k2, %k0
; X86-NEXT:    vmovups 68(%eax), %zmm2 {%k2} {z}
; X86-NEXT:    vmovups 4(%eax), %zmm3 {%k1} {z}
; X86-NEXT:    vcmpltps %zmm3, %zmm0, %k1
; X86-NEXT:    vcmpltps %zmm2, %zmm1, %k2
; X86-NEXT:    kunpckwd %k1, %k2, %k1
; X86-NEXT:    kortestd %k1, %k0
; X86-NEXT:    je LBB45_2
; X86-NEXT:  ## %bb.1: ## %L1
; X86-NEXT:    vmovaps %zmm0, (%eax)
; X86-NEXT:    vmovaps %zmm1, 64(%eax)
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB45_2: ## %L2
; X86-NEXT:    vmovaps %zmm0, 4(%eax)
; X86-NEXT:    vmovaps %zmm1, 68(%eax)
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %addr1 = getelementptr float, float * %base, i64 0
  %addr2 = getelementptr float, float * %base, i64 1

  %vaddr1 = bitcast float* %addr1 to <32 x float>*
  %vaddr2 = bitcast float* %addr2 to <32 x float>*

  %val1 = load <32 x float>, <32 x float> *%vaddr1, align 1
  %val2 = load <32 x float>, <32 x float> *%vaddr2, align 1

  %sel1 = fcmp ogt <32 x float>%in, %val1
  %val3 = select <32 x i1> %sel1, <32 x float> %val2, <32 x float> zeroinitializer
  %sel2 = fcmp olt <32 x float> %in, %val3
  %sel3 = or <32 x i1> %sel1, %sel2

  %int_sel3 = bitcast <32 x i1> %sel3 to i32
  %res = icmp eq i32 %int_sel3, zeroinitializer
  br i1 %res, label %L2, label %L1
L1:
  store <32 x float> %in, <32 x float>* %vaddr1
  br label %End
L2:
  store <32 x float> %in, <32 x float>* %vaddr2
  br label %End
End:
  ret void
}

define <8 x i64> @load_8i1(<8 x i1>* %a) {
; KNL-LABEL: load_8i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    movzbl (%rdi), %eax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    retq
;
; SKX-LABEL: load_8i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovb (%rdi), %k0
; SKX-NEXT:    vpmovm2q %k0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: load_8i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    movzbl (%rdi), %eax
; AVX512BW-NEXT:    kmovd %eax, %k1
; AVX512BW-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: load_8i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovb (%rdi), %k0
; AVX512DQ-NEXT:    vpmovm2q %k0, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: load_8i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovb (%eax), %k0
; X86-NEXT:    vpmovm2q %k0, %zmm0
; X86-NEXT:    retl
  %b = load <8 x i1>, <8 x i1>* %a
  %c = sext <8 x i1> %b to <8 x i64>
  ret <8 x i64> %c
}

define <16 x i32> @load_16i1(<16 x i1>* %a) {
; KNL-LABEL: load_16i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw (%rdi), %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    retq
;
; SKX-LABEL: load_16i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovw (%rdi), %k0
; SKX-NEXT:    vpmovm2d %k0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: load_16i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovw (%rdi), %k1
; AVX512BW-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: load_16i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw (%rdi), %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: load_16i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovw (%eax), %k0
; X86-NEXT:    vpmovm2d %k0, %zmm0
; X86-NEXT:    retl
  %b = load <16 x i1>, <16 x i1>* %a
  %c = sext <16 x i1> %b to <16 x i32>
  ret <16 x i32> %c
}

define <2 x i16> @load_2i1(<2 x i1>* %a) {
; KNL-LABEL: load_2i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    movzbl (%rdi), %eax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdw %zmm0, %ymm0
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: load_2i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovb (%rdi), %k0
; SKX-NEXT:    vpmovm2w %k0, %xmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: load_2i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    movzbl (%rdi), %eax
; AVX512BW-NEXT:    kmovd %eax, %k0
; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: load_2i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovb (%rdi), %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: load_2i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovb (%eax), %k0
; X86-NEXT:    vpmovm2w %k0, %xmm0
; X86-NEXT:    retl
  %b = load <2 x i1>, <2 x i1>* %a
  %c = sext <2 x i1> %b to <2 x i16>
  ret <2 x i16> %c
}

define <4 x i16> @load_4i1(<4 x i1>* %a) {
; KNL-LABEL: load_4i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    movzbl (%rdi), %eax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdw %zmm0, %ymm0
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: load_4i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovb (%rdi), %k0
; SKX-NEXT:    vpmovm2w %k0, %xmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: load_4i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    movzbl (%rdi), %eax
; AVX512BW-NEXT:    kmovd %eax, %k0
; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: load_4i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovb (%rdi), %k0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: load_4i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovb (%eax), %k0
; X86-NEXT:    vpmovm2w %k0, %xmm0
; X86-NEXT:    retl
  %b = load <4 x i1>, <4 x i1>* %a
  %c = sext <4 x i1> %b to <4 x i16>
  ret <4 x i16> %c
}

define <32 x i16> @load_32i1(<32 x i1>* %a) {
; KNL-LABEL: load_32i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw (%rdi), %k1
; KNL-NEXT:    kmovw 2(%rdi), %k2
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdw %zmm0, %ymm0
; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
; KNL-NEXT:    vpmovdw %zmm1, %ymm1
; KNL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: load_32i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovd (%rdi), %k0
; SKX-NEXT:    vpmovm2w %k0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: load_32i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovd (%rdi), %k0
; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: load_32i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw (%rdi), %k0
; AVX512DQ-NEXT:    kmovw 2(%rdi), %k1
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm1
; AVX512DQ-NEXT:    vpmovdw %zmm1, %ymm1
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: load_32i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd (%eax), %k0
; X86-NEXT:    vpmovm2w %k0, %zmm0
; X86-NEXT:    retl
  %b = load <32 x i1>, <32 x i1>* %a
  %c = sext <32 x i1> %b to <32 x i16>
  ret <32 x i16> %c
}

define <64 x i8> @load_64i1(<64 x i1>* %a) {
; KNL-LABEL: load_64i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw (%rdi), %k1
; KNL-NEXT:    kmovw 2(%rdi), %k2
; KNL-NEXT:    kmovw 4(%rdi), %k3
; KNL-NEXT:    kmovw 6(%rdi), %k4
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k3} {z}
; KNL-NEXT:    vpmovdb %zmm0, %xmm0
; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k4} {z}
; KNL-NEXT:    vpmovdb %zmm1, %xmm1
; KNL-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
; KNL-NEXT:    vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; KNL-NEXT:    vpmovdb %zmm1, %xmm1
; KNL-NEXT:    vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
; KNL-NEXT:    vpmovdb %zmm2, %xmm2
; KNL-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
; KNL-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; KNL-NEXT:    retq
;
; SKX-LABEL: load_64i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovq (%rdi), %k0
; SKX-NEXT:    vpmovm2b %k0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: load_64i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovq (%rdi), %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: load_64i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw (%rdi), %k0
; AVX512DQ-NEXT:    kmovw 2(%rdi), %k1
; AVX512DQ-NEXT:    kmovw 4(%rdi), %k2
; AVX512DQ-NEXT:    kmovw 6(%rdi), %k3
; AVX512DQ-NEXT:    vpmovm2d %k2, %zmm0
; AVX512DQ-NEXT:    vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT:    vpmovm2d %k3, %zmm1
; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm1
; AVX512DQ-NEXT:    vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT:    vpmovm2d %k1, %zmm2
; AVX512DQ-NEXT:    vpmovdb %zmm2, %xmm2
; AVX512DQ-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: load_64i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovq (%eax), %k0
; X86-NEXT:    vpmovm2b %k0, %zmm0
; X86-NEXT:    retl
  %b = load <64 x i1>, <64 x i1>* %a
  %c = sext <64 x i1> %b to <64 x i8>
  ret <64 x i8> %c
}

define void @store_8i1(<8 x i1>* %a, <8 x i1> %v) {
; KNL-LABEL: store_8i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_8i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
; SKX-NEXT:    vpmovw2m %xmm0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_8i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_8i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_8i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
; X86-NEXT:    vpmovw2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  store <8 x i1> %v, <8 x i1>* %a
  ret void
}

define void @store_8i1_1(<8 x i1>* %a, <8 x i16> %v) {
; KNL-LABEL: store_8i1_1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movb %al, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_8i1_1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
; SKX-NEXT:    vpmovw2m %xmm0, %k0
; SKX-NEXT:    kmovb %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_8i1_1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movb %al, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_8i1_1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_8i1_1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
; X86-NEXT:    vpmovw2m %xmm0, %k0
; X86-NEXT:    kmovb %k0, (%eax)
; X86-NEXT:    retl
  %v1 = trunc <8 x i16> %v to <8 x i1>
  store <8 x i1> %v1, <8 x i1>* %a
  ret void
}

define void @store_16i1(<16 x i1>* %a, <16 x i1> %v) {
; KNL-LABEL: store_16i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_16i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $7, %xmm0, %xmm0
; SKX-NEXT:    vpmovb2m %xmm0, %k0
; SKX-NEXT:    kmovw %k0, (%rdi)
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_16i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
; AVX512BW-NEXT:    kmovw %k0, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_16i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_16i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
; X86-NEXT:    vpmovb2m %xmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovw %k0, (%eax)
; X86-NEXT:    retl
  store <16 x i1> %v, <16 x i1>* %a
  ret void
}

define void @store_32i1(<32 x i1>* %a, <32 x i1> %v) {
; KNL-LABEL: store_32i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpmovsxbd %xmm0, %zmm1
; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT:    vextracti128 $1, %ymm0, %xmm0
; KNL-NEXT:    vpmovsxbd %xmm0, %zmm0
; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT:    kmovw %k1, 2(%rdi)
; KNL-NEXT:    kmovw %k0, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_32i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $7, %ymm0, %ymm0
; SKX-NEXT:    vpmovb2m %ymm0, %k0
; SKX-NEXT:    kmovd %k0, (%rdi)
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_32i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $7, %ymm0, %ymm0
; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
; AVX512BW-NEXT:    kmovd %k0, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_32i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm1
; AVX512DQ-NEXT:    vpslld $31, %zmm1, %zmm1
; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k0
; AVX512DQ-NEXT:    vextracti128 $1, %ymm0, %xmm0
; AVX512DQ-NEXT:    vpmovsxbd %xmm0, %zmm0
; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_32i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllw $7, %ymm0, %ymm0
; X86-NEXT:    vpmovb2m %ymm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %k0, (%eax)
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  store <32 x i1> %v, <32 x i1>* %a
  ret void
}

define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) {
; KNL-LABEL: store_32i1_1:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpmovsxwd %ymm0, %zmm1
; KNL-NEXT:    vpslld $31, %zmm1, %zmm1
; KNL-NEXT:    vptestmd %zmm1, %zmm1, %k0
; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k1
; KNL-NEXT:    kmovw %k1, 2(%rdi)
; KNL-NEXT:    kmovw %k0, (%rdi)
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: store_32i1_1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $15, %zmm0, %zmm0
; SKX-NEXT:    vpmovw2m %zmm0, %k0
; SKX-NEXT:    kmovd %k0, (%rdi)
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_32i1_1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $15, %zmm0, %zmm0
; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
; AVX512BW-NEXT:    kmovd %k0, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_32i1_1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm1
; AVX512DQ-NEXT:    vpslld $31, %zmm1, %zmm1
; AVX512DQ-NEXT:    vpmovd2m %zmm1, %k0
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k1
; AVX512DQ-NEXT:    kmovw %k1, 2(%rdi)
; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_32i1_1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vpsllw $15, %zmm0, %zmm0
; X86-NEXT:    vpmovw2m %zmm0, %k0
; X86-NEXT:    kmovd %k0, (%eax)
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %v1 = trunc <32 x i16> %v to <32 x i1>
  store <32 x i1> %v1, <32 x i1>* %a
  ret void
}


define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) {
;
; KNL-LABEL: store_64i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    movw $-3, %ax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %esi, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k1, %k2
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kmovw %edx, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $14, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-5, %ax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %ecx, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $13, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-9, %ax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %r8d, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $12, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-17, %ax
; KNL-NEXT:    kmovw %eax, %k6
; KNL-NEXT:    kandw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kmovw %r9d, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $11, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-33, %ax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k1, %k3
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $10, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-65, %ax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $9, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-129, %ax
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k1, %k4
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $8, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-257, %ax ## imm = 0xFEFF
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $7, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-513, %ax ## imm = 0xFDFF
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k1, %k5
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $6, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-1025, %ax ## imm = 0xFBFF
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $5, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-2049, %ax ## imm = 0xF7FF
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $4, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-4097, %ax ## imm = 0xEFFF
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $3, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-8193, %ax ## imm = 0xDFFF
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $2, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-16385, %ax ## imm = 0xBFFF
; KNL-NEXT:    kmovw %eax, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $14, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kshiftlw $1, %k0, %k0
; KNL-NEXT:    kshiftrw $1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $14, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $13, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $12, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k6, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $11, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $10, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $9, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $8, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $7, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $6, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $5, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $4, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT:    kandw %k7, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $3, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT:    kandw %k7, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $2, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT:    kandw %k7, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $14, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kshiftlw $1, %k0, %k0
; KNL-NEXT:    kshiftrw $1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT:    kandw %k7, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $14, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $13, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $12, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $11, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $10, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k6, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $9, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $8, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $7, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $6, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $5, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $4, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $3, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $2, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $14, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kshiftlw $1, %k0, %k0
; KNL-NEXT:    kshiftrw $1, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT:    kandw %k5, %k7, %k7
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $14, %k6, %k6
; KNL-NEXT:    korw %k6, %k7, %k6
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT:    kandw %k5, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $13, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; KNL-NEXT:    kandw %k5, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $12, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kandw %k1, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $11, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $10, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $9, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $8, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $7, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kandw %k3, %k6, %k6
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $6, %k7, %k7
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kandw %k4, %k6, %k5
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $5, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k5, %k4
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k5
; KNL-NEXT:    kshiftlw $15, %k5, %k5
; KNL-NEXT:    kshiftrw $4, %k5, %k5
; KNL-NEXT:    korw %k5, %k4, %k4
; KNL-NEXT:    kandw %k2, %k4, %k3
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k4
; KNL-NEXT:    kshiftlw $15, %k4, %k4
; KNL-NEXT:    kshiftrw $3, %k4, %k4
; KNL-NEXT:    korw %k4, %k3, %k3
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k3, %k2
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k3
; KNL-NEXT:    kshiftlw $15, %k3, %k3
; KNL-NEXT:    kshiftrw $2, %k3, %k3
; KNL-NEXT:    korw %k3, %k2, %k2
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k2, %k1
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k2
; KNL-NEXT:    kshiftlw $14, %k2, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kshiftlw $1, %k1, %k1
; KNL-NEXT:    kshiftrw $1, %k1, %k1
; KNL-NEXT:    movb {{[0-9]+}}(%rsp), %al
; KNL-NEXT:    kmovw %eax, %k2
; KNL-NEXT:    kshiftlw $15, %k2, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kmovw %k1, 6(%rdi)
; KNL-NEXT:    kmovw %k0, 4(%rdi)
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
; KNL-NEXT:    kmovw %k0, 2(%rdi)
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
; KNL-NEXT:    kmovw %k0, (%rdi)
; KNL-NEXT:    retq
;
; SKX-LABEL: store_64i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpsllw $7, %zmm0, %zmm0
; SKX-NEXT:    vpmovb2m %zmm0, %k0
; SKX-NEXT:    kmovq %k0, (%rdi)
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: store_64i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpsllw $7, %zmm0, %zmm0
; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
; AVX512BW-NEXT:    kmovq %k0, (%rdi)
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: store_64i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    movw $-3, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %esi, %k0
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k1, %k2
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kmovw %edx, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $14, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-5, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %ecx, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $13, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-9, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %r8d, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $12, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-17, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k6
; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kmovw %r9d, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $11, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-33, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k1, %k3
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $10, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-65, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $9, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-129, %ax
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k1, %k4
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $8, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-257, %ax ## imm = 0xFEFF
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $7, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-513, %ax ## imm = 0xFDFF
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k1, %k5
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $6, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-1025, %ax ## imm = 0xFBFF
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $5, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-2049, %ax ## imm = 0xF7FF
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $4, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-4097, %ax ## imm = 0xEFFF
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $3, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-8193, %ax ## imm = 0xDFFF
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kshiftlw $15, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $2, %k1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    movw $-16385, %ax ## imm = 0xBFFF
; AVX512DQ-NEXT:    kmovw %eax, %k1
; AVX512DQ-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $14, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k0
; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $14, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $13, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $12, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $11, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $10, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $9, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k4, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $8, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $7, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $6, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k4, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $5, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $4, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k7, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $3, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k7, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $2, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k7, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $14, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k7, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $14, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $13, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $12, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $11, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $10, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k6, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $9, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $8, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $7, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k3, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $6, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k4, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $5, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $4, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k2, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $3, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $2, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k5, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $14, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k0, %k0
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k5, %k7, %k7
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k6
; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT:    kshiftrw $14, %k6, %k6
; AVX512DQ-NEXT:    korw %k6, %k7, %k6
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $13, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k5, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $12, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kandw %k1, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $11, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $10, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $9, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $8, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $7, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kandw %k3, %k6, %k6
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k7
; AVX512DQ-NEXT:    kshiftlw $15, %k7, %k7
; AVX512DQ-NEXT:    kshiftrw $6, %k7, %k7
; AVX512DQ-NEXT:    korw %k7, %k6, %k6
; AVX512DQ-NEXT:    kandw %k4, %k6, %k5
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k6
; AVX512DQ-NEXT:    kshiftlw $15, %k6, %k6
; AVX512DQ-NEXT:    kshiftrw $5, %k6, %k6
; AVX512DQ-NEXT:    korw %k6, %k5, %k5
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k5, %k4
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k5
; AVX512DQ-NEXT:    kshiftlw $15, %k5, %k5
; AVX512DQ-NEXT:    kshiftrw $4, %k5, %k5
; AVX512DQ-NEXT:    korw %k5, %k4, %k4
; AVX512DQ-NEXT:    kandw %k2, %k4, %k3
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k4
; AVX512DQ-NEXT:    kshiftlw $15, %k4, %k4
; AVX512DQ-NEXT:    kshiftrw $3, %k4, %k4
; AVX512DQ-NEXT:    korw %k4, %k3, %k3
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k3, %k2
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k3
; AVX512DQ-NEXT:    kshiftlw $15, %k3, %k3
; AVX512DQ-NEXT:    kshiftrw $2, %k3, %k3
; AVX512DQ-NEXT:    korw %k3, %k2, %k2
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; AVX512DQ-NEXT:    kandw %k1, %k2, %k1
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k2
; AVX512DQ-NEXT:    kshiftlw $14, %k2, %k2
; AVX512DQ-NEXT:    korw %k2, %k1, %k1
; AVX512DQ-NEXT:    kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT:    kshiftrw $1, %k1, %k1
; AVX512DQ-NEXT:    movb {{[0-9]+}}(%rsp), %al
; AVX512DQ-NEXT:    kmovw %eax, %k2
; AVX512DQ-NEXT:    kshiftlw $15, %k2, %k2
; AVX512DQ-NEXT:    korw %k2, %k1, %k1
; AVX512DQ-NEXT:    kmovw %k1, 6(%rdi)
; AVX512DQ-NEXT:    kmovw %k0, 4(%rdi)
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
; AVX512DQ-NEXT:    kmovw %k0, 2(%rdi)
; AVX512DQ-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload
; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: store_64i1:
; X86:       ## %bb.0:
; X86-NEXT:    vpsllw $7, %zmm0, %zmm0
; X86-NEXT:    vpmovb2m %zmm0, %k0
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovq %k0, (%eax)
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  store <64 x i1> %v, <64 x i1>* %a
  ret void
}

define i32 @test_bitcast_v8i1_zext(<16 x i32> %a) {
; KNL-LABEL: test_bitcast_v8i1_zext:
; KNL:       ## %bb.0:
; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    movzbl %al, %eax
; KNL-NEXT:    addl %eax, %eax
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
;
; SKX-LABEL: test_bitcast_v8i1_zext:
; SKX:       ## %bb.0:
; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; SKX-NEXT:    kmovb %k0, %eax
; SKX-NEXT:    addl %eax, %eax
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: test_bitcast_v8i1_zext:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    movzbl %al, %eax
; AVX512BW-NEXT:    addl %eax, %eax
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: test_bitcast_v8i1_zext:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT:    kmovb %k0, %eax
; AVX512DQ-NEXT:    addl %eax, %eax
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: test_bitcast_v8i1_zext:
; X86:       ## %bb.0:
; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT:    kmovb %k0, %eax
; X86-NEXT:    addl %eax, %eax
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
   %v1 = icmp eq <16 x i32> %a, zeroinitializer
   %mask = shufflevector <16 x i1> %v1, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   %mask1 = bitcast <8 x i1> %mask to i8
   %val = zext i8 %mask1 to i32
   %val1 = add i32 %val, %val
   ret i32 %val1
}

define i32 @test_bitcast_v16i1_zext(<16 x i32> %a) {
; CHECK-LABEL: test_bitcast_v16i1_zext:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; CHECK-NEXT:    kmovw %k0, %eax
; CHECK-NEXT:    addl %eax, %eax
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
;
; X86-LABEL: test_bitcast_v16i1_zext:
; X86:       ## %bb.0:
; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT:    kmovw %k0, %eax
; X86-NEXT:    addl %eax, %eax
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
   %v1 = icmp eq <16 x i32> %a, zeroinitializer
   %mask1 = bitcast <16 x i1> %v1 to i16
   %val = zext i16 %mask1 to i32
   %val1 = add i32 %val, %val
   ret i32 %val1
}

define i16 @test_v16i1_add(i16 %x, i16 %y) {
; CHECK-LABEL: test_v16i1_add:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    xorl %esi, %eax
; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v16i1_add:
; X86:       ## %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    xorw {{[0-9]+}}(%esp), %ax
; X86-NEXT:    retl
  %m0 = bitcast i16 %x to <16 x i1>
  %m1 = bitcast i16 %y to <16 x i1>
  %m2 = add <16 x i1> %m0,  %m1
  %ret = bitcast <16 x i1> %m2 to i16
  ret i16 %ret
}

define i16 @test_v16i1_sub(i16 %x, i16 %y) {
; CHECK-LABEL: test_v16i1_sub:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    xorl %esi, %eax
; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v16i1_sub:
; X86:       ## %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    xorw {{[0-9]+}}(%esp), %ax
; X86-NEXT:    retl
  %m0 = bitcast i16 %x to <16 x i1>
  %m1 = bitcast i16 %y to <16 x i1>
  %m2 = sub <16 x i1> %m0,  %m1
  %ret = bitcast <16 x i1> %m2 to i16
  ret i16 %ret
}

define i16 @test_v16i1_mul(i16 %x, i16 %y) {
; CHECK-LABEL: test_v16i1_mul:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    andl %esi, %eax
; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v16i1_mul:
; X86:       ## %bb.0:
; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
; X86-NEXT:    retl
  %m0 = bitcast i16 %x to <16 x i1>
  %m1 = bitcast i16 %y to <16 x i1>
  %m2 = mul <16 x i1> %m0,  %m1
  %ret = bitcast <16 x i1> %m2 to i16
  ret i16 %ret
}

define i8 @test_v8i1_add(i8 %x, i8 %y) {
; CHECK-LABEL: test_v8i1_add:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    xorl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v8i1_add:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
; X86-NEXT:    retl
  %m0 = bitcast i8 %x to <8 x i1>
  %m1 = bitcast i8 %y to <8 x i1>
  %m2 = add <8 x i1> %m0,  %m1
  %ret = bitcast <8 x i1> %m2 to i8
  ret i8 %ret
}

define i8 @test_v8i1_sub(i8 %x, i8 %y) {
; CHECK-LABEL: test_v8i1_sub:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    xorl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v8i1_sub:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
; X86-NEXT:    retl
  %m0 = bitcast i8 %x to <8 x i1>
  %m1 = bitcast i8 %y to <8 x i1>
  %m2 = sub <8 x i1> %m0,  %m1
  %ret = bitcast <8 x i1> %m2 to i8
  ret i8 %ret
}

define i8 @test_v8i1_mul(i8 %x, i8 %y) {
; CHECK-LABEL: test_v8i1_mul:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    andl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v8i1_mul:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
; X86-NEXT:    retl
  %m0 = bitcast i8 %x to <8 x i1>
  %m1 = bitcast i8 %y to <8 x i1>
  %m2 = mul <8 x i1> %m0,  %m1
  %ret = bitcast <8 x i1> %m2 to i8
  ret i8 %ret
}

; Make sure we don't emit a ktest for signed comparisons.
define void @ktest_signed(<16 x i32> %x, <16 x i32> %y) {
; KNL-LABEL: ktest_signed:
; KNL:       ## %bb.0:
; KNL-NEXT:    vpord %zmm1, %zmm0, %zmm0
; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    testw %ax, %ax
; KNL-NEXT:    jle LBB66_1
; KNL-NEXT:  ## %bb.2: ## %bb.2
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB66_1: ## %bb.1
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    callq _foo
; KNL-NEXT:    addq $8, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_signed:
; SKX:       ## %bb.0:
; SKX-NEXT:    vpord %zmm1, %zmm0, %zmm0
; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    testw %ax, %ax
; SKX-NEXT:    jle LBB66_1
; SKX-NEXT:  ## %bb.2: ## %bb.2
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB66_1: ## %bb.1
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _foo
; SKX-NEXT:    addq $8, %rsp
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_signed:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vpord %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    testw %ax, %ax
; AVX512BW-NEXT:    jle LBB66_1
; AVX512BW-NEXT:  ## %bb.2: ## %bb.2
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB66_1: ## %bb.1
; AVX512BW-NEXT:    pushq %rax
; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    callq _foo
; AVX512BW-NEXT:    addq $8, %rsp
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_signed:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vpord %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    testw %ax, %ax
; AVX512DQ-NEXT:    jle LBB66_1
; AVX512DQ-NEXT:  ## %bb.2: ## %bb.2
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB66_1: ## %bb.1
; AVX512DQ-NEXT:    pushq %rax
; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    callq _foo
; AVX512DQ-NEXT:    addq $8, %rsp
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_signed:
; X86:       ## %bb.0:
; X86-NEXT:    vpord %zmm1, %zmm0, %zmm0
; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    testw %ax, %ax
; X86-NEXT:    jle LBB66_1
; X86-NEXT:  ## %bb.2: ## %bb.2
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB66_1: ## %bb.1
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    vzeroupper
; X86-NEXT:    calll _foo
; X86-NEXT:    addl $12, %esp
; X86-NEXT:    retl
  %a = icmp eq <16 x i32> %x, zeroinitializer
  %b = icmp eq <16 x i32> %y, zeroinitializer
  %c = and <16 x i1> %a, %b
  %d = bitcast <16 x i1> %c to i16
  %e = icmp sgt i16 %d, 0
  br i1 %e, label %bb.2, label %bb.1
bb.1:
  call void @foo()
  br label %bb.2
bb.2:
  ret void
}
declare void @foo()

; Make sure we can use the C flag from kortest to check for all ones.
define void @ktest_allones(<16 x i32> %x, <16 x i32> %y) {
; CHECK-LABEL: ktest_allones:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpord %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; CHECK-NEXT:    kortestw %k0, %k0
; CHECK-NEXT:    jb LBB67_2
; CHECK-NEXT:  ## %bb.1: ## %bb.1
; CHECK-NEXT:    pushq %rax
; CHECK-NEXT:    .cfi_def_cfa_offset 16
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    callq _foo
; CHECK-NEXT:    addq $8, %rsp
; CHECK-NEXT:  LBB67_2: ## %bb.2
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
;
; X86-LABEL: ktest_allones:
; X86:       ## %bb.0:
; X86-NEXT:    vpord %zmm1, %zmm0, %zmm0
; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT:    kortestw %k0, %k0
; X86-NEXT:    jb LBB67_2
; X86-NEXT:  ## %bb.1: ## %bb.1
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    vzeroupper
; X86-NEXT:    calll _foo
; X86-NEXT:    addl $12, %esp
; X86-NEXT:  LBB67_2: ## %bb.2
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %a = icmp eq <16 x i32> %x, zeroinitializer
  %b = icmp eq <16 x i32> %y, zeroinitializer
  %c = and <16 x i1> %a, %b
  %d = bitcast <16 x i1> %c to i16
  %e = icmp eq i16 %d, -1
  br i1 %e, label %bb.2, label %bb.1
bb.1:
  call void @foo()
  br label %bb.2
bb.2:
  ret void
}

; This is derived from an intrinsic test where v4i1 mask was created by _mm_cmp_epi32_mask, then it was passed to _mm512_mask_blend_epi32 which uses a v16i1 mask.
; The widening happens in the scalar domain between the intrinsics. The middle end optmized it to this.
define <8 x i64> @mask_widening(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d, <8 x i64> %e, <8 x i64> %f) {
; KNL-LABEL: mask_widening:
; KNL:       ## %bb.0: ## %entry
; KNL-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
; KNL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
; KNL-NEXT:    kshiftlw $12, %k0, %k0
; KNL-NEXT:    kshiftrw $12, %k0, %k1
; KNL-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; KNL-NEXT:    retq
;
; SKX-LABEL: mask_widening:
; SKX:       ## %bb.0: ## %entry
; SKX-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
; SKX-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: mask_widening:
; AVX512BW:       ## %bb.0: ## %entry
; AVX512BW-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
; AVX512BW-NEXT:    kshiftlw $12, %k0, %k0
; AVX512BW-NEXT:    kshiftrw $12, %k0, %k1
; AVX512BW-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: mask_widening:
; AVX512DQ:       ## %bb.0: ## %entry
; AVX512DQ-NEXT:    ## kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512DQ-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
; AVX512DQ-NEXT:    kshiftlw $12, %k0, %k0
; AVX512DQ-NEXT:    kshiftrw $12, %k0, %k1
; AVX512DQ-NEXT:    vpblendmd %zmm5, %zmm4, %zmm0 {%k1}
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: mask_widening:
; X86:       ## %bb.0: ## %entry
; X86-NEXT:    pushl %ebp
; X86-NEXT:    .cfi_def_cfa_offset 8
; X86-NEXT:    .cfi_offset %ebp, -8
; X86-NEXT:    movl %esp, %ebp
; X86-NEXT:    .cfi_def_cfa_register %ebp
; X86-NEXT:    andl $-64, %esp
; X86-NEXT:    subl $64, %esp
; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k1
; X86-NEXT:    vmovdqa64 8(%ebp), %zmm0
; X86-NEXT:    vmovdqa32 72(%ebp), %zmm0 {%k1}
; X86-NEXT:    movl %ebp, %esp
; X86-NEXT:    popl %ebp
; X86-NEXT:    retl
entry:
  %0 = bitcast <2 x i64> %a to <4 x i32>
  %1 = bitcast <2 x i64> %b to <4 x i32>
  %2 = icmp eq <4 x i32> %0, %1
  %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %4 = bitcast <8 x i64> %f to <16 x i32>
  %5 = bitcast <8 x i64> %e to <16 x i32>
  %6 = shufflevector <8 x i1> %3, <8 x i1> <i1 false, i1 undef, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
  %7 = select <16 x i1> %6, <16 x i32> %4, <16 x i32> %5
  %8 = bitcast <16 x i32> %7 to <8 x i64>
  ret <8 x i64> %8
}

define void @store_v128i1_constant(<128 x i1>* %R) {
; CHECK-LABEL: store_v128i1_constant:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movabsq $-4611686310485172227, %rax ## imm = 0xBFFFFFBBFFFFDFFD
; CHECK-NEXT:    movq %rax, 8(%rdi)
; CHECK-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
; CHECK-NEXT:    movq %rax, (%rdi)
; CHECK-NEXT:    retq
;
; X86-LABEL: store_v128i1_constant:
; X86:       ## %bb.0: ## %entry
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vmovaps {{.*#+}} xmm0 = [4294963197,3758096251,4294959101,3221225403]
; X86-NEXT:    vmovaps %xmm0, (%eax)
; X86-NEXT:    retl
entry:
  store <128 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1>, <128 x i1>* %R
  ret void
}

define void @store_v64i1_constant(<64 x i1>* %R) {
; CHECK-LABEL: store_v64i1_constant:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movabsq $-2305843576149381123, %rax ## imm = 0xDFFFFF7BFFFFEFFD
; CHECK-NEXT:    movq %rax, (%rdi)
; CHECK-NEXT:    retq
;
; X86-LABEL: store_v64i1_constant:
; X86:       ## %bb.0: ## %entry
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movl $-536871045, 4(%eax) ## imm = 0xDFFFFF7B
; X86-NEXT:    movl $-4099, (%eax) ## imm = 0xEFFD
; X86-NEXT:    retl
entry:
  store <64 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <64 x i1>* %R
  ret void
}

define void @store_v2i1_constant(<2 x i1>* %R) {
; CHECK-LABEL: store_v2i1_constant:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movb $1, (%rdi)
; CHECK-NEXT:    retq
;
; X86-LABEL: store_v2i1_constant:
; X86:       ## %bb.0: ## %entry
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movb $1, (%eax)
; X86-NEXT:    retl
entry:
  store <2 x i1> <i1 1, i1 0>, <2 x i1>* %R
  ret void
}

define void @store_v4i1_constant(<4 x i1>* %R) {
; CHECK-LABEL: store_v4i1_constant:
; CHECK:       ## %bb.0: ## %entry
; CHECK-NEXT:    movb $5, (%rdi)
; CHECK-NEXT:    retq
;
; X86-LABEL: store_v4i1_constant:
; X86:       ## %bb.0: ## %entry
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    movb $5, (%eax)
; X86-NEXT:    retl
entry:
  store <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x i1>* %R
  ret void
}

; Make sure we bring the -1 constant into the mask domain.
define void @mask_not_cast(i8*, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>) {
; CHECK-LABEL: mask_not_cast:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
; CHECK-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
;
; X86-LABEL: mask_not_cast:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vpcmpnleud %zmm3, %zmm2, %k1
; X86-NEXT:    vptestmd %zmm0, %zmm1, %k1 {%k1}
; X86-NEXT:    vmovdqu32 %zmm0, (%eax) {%k1}
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
  %6 = and <8 x i64> %2, %1
  %7 = bitcast <8 x i64> %6 to <16 x i32>
  %8 = icmp ne <16 x i32> %7, zeroinitializer
  %9 = bitcast <16 x i1> %8 to i16
  %10 = bitcast <8 x i64> %3 to <16 x i32>
  %11 = bitcast <8 x i64> %4 to <16 x i32>
  %12 = icmp ule <16 x i32> %10, %11
  %13 = bitcast <16 x i1> %12 to i16
  %14 = xor i16 %13, -1
  %15 = and i16 %14, %9
  %16 = bitcast <8 x i64> %1 to <16 x i32>
  %17 = bitcast i8* %0 to <16 x i32>*
  %18 = bitcast i16 %15 to <16 x i1>
  tail call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> %16, <16 x i32>* %17, i32 1, <16 x i1> %18) #2
  ret void
}
declare void @llvm.masked.store.v16i32.p0v16i32(<16 x i32>, <16 x i32>*, i32, <16 x i1>)

define void @ktest_3(<8 x i32> %w, <8 x i32> %x, <8 x i32> %y, <8 x i32> %z) {
; KNL-LABEL: ktest_3:
; KNL:       ## %bb.0:
; KNL-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
; KNL-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
; KNL-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
; KNL-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; KNL-NEXT:    vptestnmd %zmm2, %zmm2, %k2
; KNL-NEXT:    vptestnmd %zmm3, %zmm3, %k3
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    korw %k3, %k2, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    testb %al, %al
; KNL-NEXT:    je LBB74_1
; KNL-NEXT:  ## %bb.2: ## %exit
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB74_1: ## %bar
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    callq _foo
; KNL-NEXT:    addq $8, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_3:
; SKX:       ## %bb.0:
; SKX-NEXT:    vptestnmd %ymm0, %ymm0, %k0
; SKX-NEXT:    vptestnmd %ymm1, %ymm1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    vptestnmd %ymm2, %ymm2, %k1
; SKX-NEXT:    vptestnmd %ymm3, %ymm3, %k2
; SKX-NEXT:    korb %k2, %k1, %k1
; SKX-NEXT:    ktestb %k1, %k0
; SKX-NEXT:    je LBB74_1
; SKX-NEXT:  ## %bb.2: ## %exit
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB74_1: ## %bar
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _foo
; SKX-NEXT:    addq $8, %rsp
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_3:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
; AVX512BW-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512BW-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512BW-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; AVX512BW-NEXT:    vptestnmd %zmm2, %zmm2, %k2
; AVX512BW-NEXT:    vptestnmd %zmm3, %zmm3, %k3
; AVX512BW-NEXT:    korw %k1, %k0, %k0
; AVX512BW-NEXT:    korw %k3, %k2, %k1
; AVX512BW-NEXT:    kandw %k1, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    testb %al, %al
; AVX512BW-NEXT:    je LBB74_1
; AVX512BW-NEXT:  ## %bb.2: ## %exit
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB74_1: ## %bar
; AVX512BW-NEXT:    pushq %rax
; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    callq _foo
; AVX512BW-NEXT:    addq $8, %rsp
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_3:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    ## kill: def $ymm3 killed $ymm3 def $zmm3
; AVX512DQ-NEXT:    ## kill: def $ymm2 killed $ymm2 def $zmm2
; AVX512DQ-NEXT:    ## kill: def $ymm1 killed $ymm1 def $zmm1
; AVX512DQ-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; AVX512DQ-NEXT:    vptestnmd %zmm2, %zmm2, %k2
; AVX512DQ-NEXT:    vptestnmd %zmm3, %zmm3, %k3
; AVX512DQ-NEXT:    korb %k1, %k0, %k0
; AVX512DQ-NEXT:    korb %k3, %k2, %k1
; AVX512DQ-NEXT:    ktestb %k1, %k0
; AVX512DQ-NEXT:    je LBB74_1
; AVX512DQ-NEXT:  ## %bb.2: ## %exit
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB74_1: ## %bar
; AVX512DQ-NEXT:    pushq %rax
; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    callq _foo
; AVX512DQ-NEXT:    addq $8, %rsp
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_3:
; X86:       ## %bb.0:
; X86-NEXT:    vptestnmd %ymm0, %ymm0, %k0
; X86-NEXT:    vptestnmd %ymm1, %ymm1, %k1
; X86-NEXT:    korb %k1, %k0, %k0
; X86-NEXT:    vptestnmd %ymm2, %ymm2, %k1
; X86-NEXT:    vptestnmd %ymm3, %ymm3, %k2
; X86-NEXT:    korb %k2, %k1, %k1
; X86-NEXT:    ktestb %k1, %k0
; X86-NEXT:    je LBB74_1
; X86-NEXT:  ## %bb.2: ## %exit
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB74_1: ## %bar
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    vzeroupper
; X86-NEXT:    calll _foo
; X86-NEXT:    addl $12, %esp
; X86-NEXT:    retl
  %a = icmp eq <8 x i32> %w, zeroinitializer
  %b = icmp eq <8 x i32> %x, zeroinitializer
  %c = icmp eq <8 x i32> %y, zeroinitializer
  %d = icmp eq <8 x i32> %z, zeroinitializer
  %e = or <8 x i1> %a, %b
  %f = or <8 x i1> %c, %d
  %g = and <8 x i1> %e, %f
  %h = bitcast <8 x i1> %g to i8
  %i = icmp eq i8 %h, 0
  br i1 %i, label %bar, label %exit

bar:
  call void @foo()
  br label %exit

exit:
  ret void
}

define void @ktest_4(<8 x i64> %w, <8 x i64> %x, <8 x i64> %y, <8 x i64> %z) {
; KNL-LABEL: ktest_4:
; KNL:       ## %bb.0:
; KNL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; KNL-NEXT:    vptestnmq %zmm1, %zmm1, %k1
; KNL-NEXT:    vptestnmq %zmm2, %zmm2, %k2
; KNL-NEXT:    vptestnmq %zmm3, %zmm3, %k3
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    korw %k3, %k2, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    testb %al, %al
; KNL-NEXT:    je LBB75_1
; KNL-NEXT:  ## %bb.2: ## %exit
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB75_1: ## %bar
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    callq _foo
; KNL-NEXT:    addq $8, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_4:
; SKX:       ## %bb.0:
; SKX-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; SKX-NEXT:    vptestnmq %zmm1, %zmm1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    vptestnmq %zmm2, %zmm2, %k1
; SKX-NEXT:    vptestnmq %zmm3, %zmm3, %k2
; SKX-NEXT:    korb %k2, %k1, %k1
; SKX-NEXT:    ktestb %k1, %k0
; SKX-NEXT:    je LBB75_1
; SKX-NEXT:  ## %bb.2: ## %exit
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB75_1: ## %bar
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _foo
; SKX-NEXT:    addq $8, %rsp
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_4:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    vptestnmq %zmm1, %zmm1, %k1
; AVX512BW-NEXT:    vptestnmq %zmm2, %zmm2, %k2
; AVX512BW-NEXT:    vptestnmq %zmm3, %zmm3, %k3
; AVX512BW-NEXT:    korw %k1, %k0, %k0
; AVX512BW-NEXT:    korw %k3, %k2, %k1
; AVX512BW-NEXT:    kandw %k1, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    testb %al, %al
; AVX512BW-NEXT:    je LBB75_1
; AVX512BW-NEXT:  ## %bb.2: ## %exit
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB75_1: ## %bar
; AVX512BW-NEXT:    pushq %rax
; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    callq _foo
; AVX512BW-NEXT:    addq $8, %rsp
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_4:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; AVX512DQ-NEXT:    vptestnmq %zmm1, %zmm1, %k1
; AVX512DQ-NEXT:    korb %k1, %k0, %k0
; AVX512DQ-NEXT:    vptestnmq %zmm2, %zmm2, %k1
; AVX512DQ-NEXT:    vptestnmq %zmm3, %zmm3, %k2
; AVX512DQ-NEXT:    korb %k2, %k1, %k1
; AVX512DQ-NEXT:    ktestb %k1, %k0
; AVX512DQ-NEXT:    je LBB75_1
; AVX512DQ-NEXT:  ## %bb.2: ## %exit
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB75_1: ## %bar
; AVX512DQ-NEXT:    pushq %rax
; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    callq _foo
; AVX512DQ-NEXT:    addq $8, %rsp
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_4:
; X86:       ## %bb.0:
; X86-NEXT:    vptestnmq %zmm0, %zmm0, %k0
; X86-NEXT:    vptestnmq %zmm1, %zmm1, %k1
; X86-NEXT:    korb %k1, %k0, %k0
; X86-NEXT:    vptestnmq %zmm2, %zmm2, %k1
; X86-NEXT:    vptestnmq %zmm3, %zmm3, %k2
; X86-NEXT:    korb %k2, %k1, %k1
; X86-NEXT:    ktestb %k1, %k0
; X86-NEXT:    je LBB75_1
; X86-NEXT:  ## %bb.2: ## %exit
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB75_1: ## %bar
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    vzeroupper
; X86-NEXT:    calll _foo
; X86-NEXT:    addl $12, %esp
; X86-NEXT:    retl
  %a = icmp eq <8 x i64> %w, zeroinitializer
  %b = icmp eq <8 x i64> %x, zeroinitializer
  %c = icmp eq <8 x i64> %y, zeroinitializer
  %d = icmp eq <8 x i64> %z, zeroinitializer
  %e = or <8 x i1> %a, %b
  %f = or <8 x i1> %c, %d
  %g = and <8 x i1> %e, %f
  %h = bitcast <8 x i1> %g to i8
  %i = icmp eq i8 %h, 0
  br i1 %i, label %bar, label %exit

bar:
  call void @foo()
  br label %exit

exit:
  ret void
}

define void @ktest_5(<16 x i32> %w, <16 x i32> %x, <16 x i32> %y, <16 x i32> %z) {
; KNL-LABEL: ktest_5:
; KNL:       ## %bb.0:
; KNL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; KNL-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    vptestnmd %zmm2, %zmm2, %k1
; KNL-NEXT:    vptestnmd %zmm3, %zmm3, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kortestw %k0, %k0
; KNL-NEXT:    je LBB76_1
; KNL-NEXT:  ## %bb.2: ## %exit
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB76_1: ## %bar
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    callq _foo
; KNL-NEXT:    addq $8, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_5:
; SKX:       ## %bb.0:
; SKX-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; SKX-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; SKX-NEXT:    korw %k1, %k0, %k0
; SKX-NEXT:    vptestnmd %zmm2, %zmm2, %k1
; SKX-NEXT:    vptestnmd %zmm3, %zmm3, %k2
; SKX-NEXT:    korw %k2, %k1, %k1
; SKX-NEXT:    ktestw %k1, %k0
; SKX-NEXT:    je LBB76_1
; SKX-NEXT:  ## %bb.2: ## %exit
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB76_1: ## %bar
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _foo
; SKX-NEXT:    addq $8, %rsp
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_5:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; AVX512BW-NEXT:    korw %k1, %k0, %k0
; AVX512BW-NEXT:    vptestnmd %zmm2, %zmm2, %k1
; AVX512BW-NEXT:    vptestnmd %zmm3, %zmm3, %k2
; AVX512BW-NEXT:    korw %k2, %k1, %k1
; AVX512BW-NEXT:    kandw %k1, %k0, %k0
; AVX512BW-NEXT:    kortestw %k0, %k0
; AVX512BW-NEXT:    je LBB76_1
; AVX512BW-NEXT:  ## %bb.2: ## %exit
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB76_1: ## %bar
; AVX512BW-NEXT:    pushq %rax
; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    callq _foo
; AVX512BW-NEXT:    addq $8, %rsp
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_5:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; AVX512DQ-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; AVX512DQ-NEXT:    korw %k1, %k0, %k0
; AVX512DQ-NEXT:    vptestnmd %zmm2, %zmm2, %k1
; AVX512DQ-NEXT:    vptestnmd %zmm3, %zmm3, %k2
; AVX512DQ-NEXT:    korw %k2, %k1, %k1
; AVX512DQ-NEXT:    ktestw %k1, %k0
; AVX512DQ-NEXT:    je LBB76_1
; AVX512DQ-NEXT:  ## %bb.2: ## %exit
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB76_1: ## %bar
; AVX512DQ-NEXT:    pushq %rax
; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    callq _foo
; AVX512DQ-NEXT:    addq $8, %rsp
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_5:
; X86:       ## %bb.0:
; X86-NEXT:    vptestnmd %zmm0, %zmm0, %k0
; X86-NEXT:    vptestnmd %zmm1, %zmm1, %k1
; X86-NEXT:    korw %k1, %k0, %k0
; X86-NEXT:    vptestnmd %zmm2, %zmm2, %k1
; X86-NEXT:    vptestnmd %zmm3, %zmm3, %k2
; X86-NEXT:    korw %k2, %k1, %k1
; X86-NEXT:    ktestw %k1, %k0
; X86-NEXT:    je LBB76_1
; X86-NEXT:  ## %bb.2: ## %exit
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB76_1: ## %bar
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    vzeroupper
; X86-NEXT:    calll _foo
; X86-NEXT:    addl $12, %esp
; X86-NEXT:    retl
  %a = icmp eq <16 x i32> %w, zeroinitializer
  %b = icmp eq <16 x i32> %x, zeroinitializer
  %c = icmp eq <16 x i32> %y, zeroinitializer
  %d = icmp eq <16 x i32> %z, zeroinitializer
  %e = or <16 x i1> %a, %b
  %f = or <16 x i1> %c, %d
  %g = and <16 x i1> %e, %f
  %h = bitcast <16 x i1> %g to i16
  %i = icmp eq i16 %h, 0
  br i1 %i, label %bar, label %exit

bar:
  call void @foo()
  br label %exit

exit:
  ret void
}

define void @ktest_6(<32 x i16> %w, <32 x i16> %x, <32 x i16> %y, <32 x i16> %z) {
; KNL-LABEL: ktest_6:
; KNL:       ## %bb.0:
; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
; KNL-NEXT:    vpxor %xmm5, %xmm5, %xmm5
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm4, %ymm4
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm0, %ymm0
; KNL-NEXT:    vinserti64x4 $1, %ymm4, %zmm0, %zmm0
; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm4, %ymm4
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm1, %ymm1
; KNL-NEXT:    vinserti64x4 $1, %ymm4, %zmm1, %zmm1
; KNL-NEXT:    vporq %zmm1, %zmm0, %zmm0
; KNL-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm1, %ymm1
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm2, %ymm2
; KNL-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; KNL-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm2, %ymm2
; KNL-NEXT:    vpcmpeqw %ymm5, %ymm3, %ymm3
; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; KNL-NEXT:    vpternlogq $200, %zmm1, %zmm0, %zmm2
; KNL-NEXT:    vextracti64x4 $1, %zmm2, %ymm0
; KNL-NEXT:    vpor %ymm0, %ymm2, %ymm0
; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
; KNL-NEXT:    kortestw %k0, %k0
; KNL-NEXT:    je LBB77_1
; KNL-NEXT:  ## %bb.2: ## %exit
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB77_1: ## %bar
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    callq _foo
; KNL-NEXT:    addq $8, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_6:
; SKX:       ## %bb.0:
; SKX-NEXT:    vptestnmw %zmm0, %zmm0, %k0
; SKX-NEXT:    vptestnmw %zmm1, %zmm1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    vptestnmw %zmm2, %zmm2, %k1
; SKX-NEXT:    vptestnmw %zmm3, %zmm3, %k2
; SKX-NEXT:    kord %k2, %k1, %k1
; SKX-NEXT:    ktestd %k1, %k0
; SKX-NEXT:    je LBB77_1
; SKX-NEXT:  ## %bb.2: ## %exit
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB77_1: ## %bar
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _foo
; SKX-NEXT:    addq $8, %rsp
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_6:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    vptestnmw %zmm1, %zmm1, %k1
; AVX512BW-NEXT:    kord %k1, %k0, %k0
; AVX512BW-NEXT:    vptestnmw %zmm2, %zmm2, %k1
; AVX512BW-NEXT:    vptestnmw %zmm3, %zmm3, %k2
; AVX512BW-NEXT:    kord %k2, %k1, %k1
; AVX512BW-NEXT:    ktestd %k1, %k0
; AVX512BW-NEXT:    je LBB77_1
; AVX512BW-NEXT:  ## %bb.2: ## %exit
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB77_1: ## %bar
; AVX512BW-NEXT:    pushq %rax
; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    callq _foo
; AVX512BW-NEXT:    addq $8, %rsp
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_6:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm1, %zmm1
; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm2, %ymm2
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm2, %ymm2
; AVX512DQ-NEXT:    vpcmpeqw %ymm5, %ymm3, %ymm3
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512DQ-NEXT:    vpternlogq $200, %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT:    vpor %ymm0, %ymm2, %ymm0
; AVX512DQ-NEXT:    vpmovsxwd %ymm0, %zmm0
; AVX512DQ-NEXT:    vpslld $31, %zmm0, %zmm0
; AVX512DQ-NEXT:    vpmovd2m %zmm0, %k0
; AVX512DQ-NEXT:    kortestw %k0, %k0
; AVX512DQ-NEXT:    je LBB77_1
; AVX512DQ-NEXT:  ## %bb.2: ## %exit
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB77_1: ## %bar
; AVX512DQ-NEXT:    pushq %rax
; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    callq _foo
; AVX512DQ-NEXT:    addq $8, %rsp
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_6:
; X86:       ## %bb.0:
; X86-NEXT:    vptestnmw %zmm0, %zmm0, %k0
; X86-NEXT:    vptestnmw %zmm1, %zmm1, %k1
; X86-NEXT:    kord %k1, %k0, %k0
; X86-NEXT:    vptestnmw %zmm2, %zmm2, %k1
; X86-NEXT:    vptestnmw %zmm3, %zmm3, %k2
; X86-NEXT:    kord %k2, %k1, %k1
; X86-NEXT:    ktestd %k1, %k0
; X86-NEXT:    je LBB77_1
; X86-NEXT:  ## %bb.2: ## %exit
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB77_1: ## %bar
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    vzeroupper
; X86-NEXT:    calll _foo
; X86-NEXT:    addl $12, %esp
; X86-NEXT:    retl
  %a = icmp eq <32 x i16> %w, zeroinitializer
  %b = icmp eq <32 x i16> %x, zeroinitializer
  %c = icmp eq <32 x i16> %y, zeroinitializer
  %d = icmp eq <32 x i16> %z, zeroinitializer
  %e = or <32 x i1> %a, %b
  %f = or <32 x i1> %c, %d
  %g = and <32 x i1> %e, %f
  %h = bitcast <32 x i1> %g to i32
  %i = icmp eq i32 %h, 0
  br i1 %i, label %bar, label %exit

bar:
  call void @foo()
  br label %exit

exit:
  ret void
}

define void @ktest_7(<64 x i8> %w, <64 x i8> %x, <64 x i8> %y, <64 x i8> %z) {
; KNL-LABEL: ktest_7:
; KNL:       ## %bb.0:
; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
; KNL-NEXT:    vpxor %xmm5, %xmm5, %xmm5
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm4, %ymm4
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm0, %ymm0
; KNL-NEXT:    vinserti64x4 $1, %ymm4, %zmm0, %zmm0
; KNL-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm4, %ymm4
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm1, %ymm1
; KNL-NEXT:    vinserti64x4 $1, %ymm4, %zmm1, %zmm1
; KNL-NEXT:    vporq %zmm1, %zmm0, %zmm0
; KNL-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm1, %ymm1
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm2, %ymm2
; KNL-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; KNL-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm2, %ymm2
; KNL-NEXT:    vpcmpeqb %ymm5, %ymm3, %ymm3
; KNL-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; KNL-NEXT:    vpternlogq $200, %zmm1, %zmm0, %zmm2
; KNL-NEXT:    vextracti64x4 $1, %zmm2, %ymm0
; KNL-NEXT:    vpor %ymm0, %ymm2, %ymm0
; KNL-NEXT:    vpmovmskb %ymm0, %eax
; KNL-NEXT:    testl %eax, %eax
; KNL-NEXT:    je LBB78_1
; KNL-NEXT:  ## %bb.2: ## %exit
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    retq
; KNL-NEXT:  LBB78_1: ## %bar
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vzeroupper
; KNL-NEXT:    callq _foo
; KNL-NEXT:    addq $8, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: ktest_7:
; SKX:       ## %bb.0:
; SKX-NEXT:    vptestnmb %zmm0, %zmm0, %k0
; SKX-NEXT:    vptestnmb %zmm1, %zmm1, %k1
; SKX-NEXT:    korq %k1, %k0, %k0
; SKX-NEXT:    vptestnmb %zmm2, %zmm2, %k1
; SKX-NEXT:    vptestnmb %zmm3, %zmm3, %k2
; SKX-NEXT:    korq %k2, %k1, %k1
; SKX-NEXT:    ktestq %k1, %k0
; SKX-NEXT:    je LBB78_1
; SKX-NEXT:  ## %bb.2: ## %exit
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
; SKX-NEXT:  LBB78_1: ## %bar
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _foo
; SKX-NEXT:    addq $8, %rsp
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ktest_7:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
; AVX512BW-NEXT:    vptestnmb %zmm1, %zmm1, %k1
; AVX512BW-NEXT:    korq %k1, %k0, %k0
; AVX512BW-NEXT:    vptestnmb %zmm2, %zmm2, %k1
; AVX512BW-NEXT:    vptestnmb %zmm3, %zmm3, %k2
; AVX512BW-NEXT:    korq %k2, %k1, %k1
; AVX512BW-NEXT:    ktestq %k1, %k0
; AVX512BW-NEXT:    je LBB78_1
; AVX512BW-NEXT:  ## %bb.2: ## %exit
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    retq
; AVX512BW-NEXT:  LBB78_1: ## %bar
; AVX512BW-NEXT:    pushq %rax
; AVX512BW-NEXT:    .cfi_def_cfa_offset 16
; AVX512BW-NEXT:    vzeroupper
; AVX512BW-NEXT:    callq _foo
; AVX512BW-NEXT:    addq $8, %rsp
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ktest_7:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm0, %ymm4
; AVX512DQ-NEXT:    vpxor %xmm5, %xmm5, %xmm5
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm0, %ymm0
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm4, %ymm4
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm4, %zmm1, %zmm1
; AVX512DQ-NEXT:    vporq %zmm1, %zmm0, %zmm0
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm1, %ymm1
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm2, %ymm2
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm2, %ymm2
; AVX512DQ-NEXT:    vpcmpeqb %ymm5, %ymm3, %ymm3
; AVX512DQ-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512DQ-NEXT:    vpternlogq $200, %zmm1, %zmm0, %zmm2
; AVX512DQ-NEXT:    vextracti64x4 $1, %zmm2, %ymm0
; AVX512DQ-NEXT:    vpor %ymm0, %ymm2, %ymm0
; AVX512DQ-NEXT:    vpmovmskb %ymm0, %eax
; AVX512DQ-NEXT:    testl %eax, %eax
; AVX512DQ-NEXT:    je LBB78_1
; AVX512DQ-NEXT:  ## %bb.2: ## %exit
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    retq
; AVX512DQ-NEXT:  LBB78_1: ## %bar
; AVX512DQ-NEXT:    pushq %rax
; AVX512DQ-NEXT:    .cfi_def_cfa_offset 16
; AVX512DQ-NEXT:    vzeroupper
; AVX512DQ-NEXT:    callq _foo
; AVX512DQ-NEXT:    addq $8, %rsp
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ktest_7:
; X86:       ## %bb.0:
; X86-NEXT:    vptestnmb %zmm0, %zmm0, %k0
; X86-NEXT:    vptestnmb %zmm1, %zmm1, %k1
; X86-NEXT:    korq %k1, %k0, %k0
; X86-NEXT:    vptestnmb %zmm2, %zmm2, %k1
; X86-NEXT:    vptestnmb %zmm3, %zmm3, %k2
; X86-NEXT:    korq %k2, %k1, %k1
; X86-NEXT:    kandq %k1, %k0, %k0
; X86-NEXT:    kshiftrq $32, %k0, %k1
; X86-NEXT:    kortestd %k1, %k0
; X86-NEXT:    je LBB78_1
; X86-NEXT:  ## %bb.2: ## %exit
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
; X86-NEXT:  LBB78_1: ## %bar
; X86-NEXT:    subl $12, %esp
; X86-NEXT:    .cfi_def_cfa_offset 16
; X86-NEXT:    vzeroupper
; X86-NEXT:    calll _foo
; X86-NEXT:    addl $12, %esp
; X86-NEXT:    retl
  %a = icmp eq <64 x i8> %w, zeroinitializer
  %b = icmp eq <64 x i8> %x, zeroinitializer
  %c = icmp eq <64 x i8> %y, zeroinitializer
  %d = icmp eq <64 x i8> %z, zeroinitializer
  %e = or <64 x i1> %a, %b
  %f = or <64 x i1> %c, %d
  %g = and <64 x i1> %e, %f
  %h = bitcast <64 x i1> %g to i64
  %i = icmp eq i64 %h, 0
  br i1 %i, label %bar, label %exit

bar:
  call void @foo()
  br label %exit

exit:
  ret void
}

define <64 x i1> @mask64_insert(i32 %a) {
; KNL-LABEL: mask64_insert:
; KNL:       ## %bb.0:
; KNL-NEXT:    movq %rdi, %rax
; KNL-NEXT:    andl $1, %esi
; KNL-NEXT:    kmovw %esi, %k0
; KNL-NEXT:    movw $-4, %cx
; KNL-NEXT:    kmovw %ecx, %k1
; KNL-NEXT:    kshiftrw $1, %k1, %k1
; KNL-NEXT:    kshiftlw $1, %k1, %k1
; KNL-NEXT:    korw %k0, %k1, %k0
; KNL-NEXT:    kmovw %k0, (%rdi)
; KNL-NEXT:    movw $-3, 6(%rdi)
; KNL-NEXT:    movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
; KNL-NEXT:    retq
;
; SKX-LABEL: mask64_insert:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovd %edi, %k0
; SKX-NEXT:    kshiftlq $63, %k0, %k0
; SKX-NEXT:    kshiftrq $63, %k0, %k0
; SKX-NEXT:    movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
; SKX-NEXT:    kmovq %rax, %k1
; SKX-NEXT:    kshiftrq $1, %k1, %k1
; SKX-NEXT:    kshiftlq $1, %k1, %k1
; SKX-NEXT:    korq %k0, %k1, %k0
; SKX-NEXT:    vpmovm2b %k0, %zmm0
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: mask64_insert:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovd %edi, %k0
; AVX512BW-NEXT:    kshiftlq $63, %k0, %k0
; AVX512BW-NEXT:    kshiftrq $63, %k0, %k0
; AVX512BW-NEXT:    movabsq $-562958543486980, %rax ## imm = 0xFFFDFFFDFFFDFFFC
; AVX512BW-NEXT:    kmovq %rax, %k1
; AVX512BW-NEXT:    kshiftrq $1, %k1, %k1
; AVX512BW-NEXT:    kshiftlq $1, %k1, %k1
; AVX512BW-NEXT:    korq %k0, %k1, %k0
; AVX512BW-NEXT:    vpmovm2b %k0, %zmm0
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: mask64_insert:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    movq %rdi, %rax
; AVX512DQ-NEXT:    andl $1, %esi
; AVX512DQ-NEXT:    kmovw %esi, %k0
; AVX512DQ-NEXT:    movw $-4, %cx
; AVX512DQ-NEXT:    kmovw %ecx, %k1
; AVX512DQ-NEXT:    kshiftrw $1, %k1, %k1
; AVX512DQ-NEXT:    kshiftlw $1, %k1, %k1
; AVX512DQ-NEXT:    korw %k0, %k1, %k0
; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
; AVX512DQ-NEXT:    movw $-3, 6(%rdi)
; AVX512DQ-NEXT:    movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: mask64_insert:
; X86:       ## %bb.0:
; X86-NEXT:    kmovb {{[0-9]+}}(%esp), %k0
; X86-NEXT:    movl $-131076, %eax ## imm = 0xFFFDFFFC
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    movl $-131075, %eax ## imm = 0xFFFDFFFD
; X86-NEXT:    kmovd %eax, %k2
; X86-NEXT:    kunpckdq %k1, %k2, %k1
; X86-NEXT:    kshiftrq $1, %k1, %k1
; X86-NEXT:    kshiftlq $1, %k1, %k1
; X86-NEXT:    kshiftlq $63, %k0, %k0
; X86-NEXT:    kshiftrq $63, %k0, %k0
; X86-NEXT:    korq %k0, %k1, %k0
; X86-NEXT:    vpmovm2b %k0, %zmm0
; X86-NEXT:    retl
  %a_i = trunc i32 %a to i1
  %maskv = insertelement <64 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
  ret <64 x i1> %maskv
}

define i1 @test_v1i1_add(i1 %x, i1 %y) {
; CHECK-LABEL: test_v1i1_add:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    xorl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v1i1_add:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
; X86-NEXT:    retl
  %m0 = bitcast i1 %x to <1 x i1>
  %m1 = bitcast i1 %y to <1 x i1>
  %m2 = add <1 x i1> %m0,  %m1
  %ret = bitcast <1 x i1> %m2 to i1
  ret i1 %ret
}

define i1 @test_v1i1_sub(i1 %x, i1 %y) {
; CHECK-LABEL: test_v1i1_sub:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    xorl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v1i1_sub:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    xorb {{[0-9]+}}(%esp), %al
; X86-NEXT:    retl
  %m0 = bitcast i1 %x to <1 x i1>
  %m1 = bitcast i1 %y to <1 x i1>
  %m2 = sub <1 x i1> %m0,  %m1
  %ret = bitcast <1 x i1> %m2 to i1
  ret i1 %ret
}

define i1 @test_v1i1_mul(i1 %x, i1 %y) {
; CHECK-LABEL: test_v1i1_mul:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    andl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: test_v1i1_mul:
; X86:       ## %bb.0:
; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
; X86-NEXT:    retl
  %m0 = bitcast i1 %x to <1 x i1>
  %m1 = bitcast i1 %y to <1 x i1>
  %m2 = mul <1 x i1> %m0,  %m1
  %ret = bitcast <1 x i1> %m2 to i1
  ret i1 %ret
}

define <1 x i1> @uadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; CHECK-LABEL: uadd_sat_v1i1:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    orl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: uadd_sat_v1i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    ## kill: def $al killed $al killed $eax
; X86-NEXT:    retl
  %z = call <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
  ret <1 x i1> %z
}
declare <1 x i1> @llvm.uadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)

define <1 x i1> @usub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; KNL-LABEL: usub_sat_v1i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw %esi, %k0
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kandnw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    ## kill: def $al killed $al killed $eax
; KNL-NEXT:    retq
;
; SKX-LABEL: usub_sat_v1i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovd %esi, %k0
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kandnw %k1, %k0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    ## kill: def $al killed $al killed $eax
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: usub_sat_v1i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovd %esi, %k0
; AVX512BW-NEXT:    kmovd %edi, %k1
; AVX512BW-NEXT:    kandnw %k1, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: usub_sat_v1i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw %esi, %k0
; AVX512DQ-NEXT:    kmovw %edi, %k1
; AVX512DQ-NEXT:    kandnw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: usub_sat_v1i1:
; X86:       ## %bb.0:
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    kandnw %k1, %k0, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    ## kill: def $al killed $al killed $eax
; X86-NEXT:    retl
  %z = call <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
  ret <1 x i1> %z
}
declare <1 x i1> @llvm.usub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)

define <1 x i1> @sadd_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; CHECK-LABEL: sadd_sat_v1i1:
; CHECK:       ## %bb.0:
; CHECK-NEXT:    movl %edi, %eax
; CHECK-NEXT:    orl %esi, %eax
; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
; CHECK-NEXT:    retq
;
; X86-LABEL: sadd_sat_v1i1:
; X86:       ## %bb.0:
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    ## kill: def $al killed $al killed $eax
; X86-NEXT:    retl
  %z = call <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
  ret <1 x i1> %z
}
declare <1 x i1> @llvm.sadd.sat.v1i1(<1 x i1> %x, <1 x i1> %y)

define <1 x i1> @ssub_sat_v1i1(<1 x i1> %x, <1 x i1> %y) nounwind {
; KNL-LABEL: ssub_sat_v1i1:
; KNL:       ## %bb.0:
; KNL-NEXT:    kmovw %esi, %k0
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kandnw %k1, %k0, %k0
; KNL-NEXT:    kmovw %k0, %eax
; KNL-NEXT:    ## kill: def $al killed $al killed $eax
; KNL-NEXT:    retq
;
; SKX-LABEL: ssub_sat_v1i1:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovd %esi, %k0
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kandnw %k1, %k0, %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    ## kill: def $al killed $al killed $eax
; SKX-NEXT:    retq
;
; AVX512BW-LABEL: ssub_sat_v1i1:
; AVX512BW:       ## %bb.0:
; AVX512BW-NEXT:    kmovd %esi, %k0
; AVX512BW-NEXT:    kmovd %edi, %k1
; AVX512BW-NEXT:    kandnw %k1, %k0, %k0
; AVX512BW-NEXT:    kmovd %k0, %eax
; AVX512BW-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512BW-NEXT:    retq
;
; AVX512DQ-LABEL: ssub_sat_v1i1:
; AVX512DQ:       ## %bb.0:
; AVX512DQ-NEXT:    kmovw %esi, %k0
; AVX512DQ-NEXT:    kmovw %edi, %k1
; AVX512DQ-NEXT:    kandnw %k1, %k0, %k0
; AVX512DQ-NEXT:    kmovw %k0, %eax
; AVX512DQ-NEXT:    ## kill: def $al killed $al killed $eax
; AVX512DQ-NEXT:    retq
;
; X86-LABEL: ssub_sat_v1i1:
; X86:       ## %bb.0:
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    kandnw %k1, %k0, %k0
; X86-NEXT:    kmovd %k0, %eax
; X86-NEXT:    ## kill: def $al killed $al killed $eax
; X86-NEXT:    retl
  %z = call <1 x i1> @llvm.ssub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)
  ret <1 x i1> %z
}
declare <1 x i1> @llvm.ssub.sat.v1i1(<1 x i1> %x, <1 x i1> %y)

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
!2 = !{!"ProfileFormat", !"InstrProf"}
!3 = !{!"TotalCount", i64 10000}
!4 = !{!"MaxCount", i64 10}
!5 = !{!"MaxInternalCount", i64 1}
!6 = !{!"MaxFunctionCount", i64 1000}
!7 = !{!"NumCounts", i64 3}
!8 = !{!"NumFunctions", i64 3}
!9 = !{!"DetailedSummary", !10}
!10 = !{!11, !12, !13}
!11 = !{i32 10000, i64 100, i32 1}
!12 = !{i32 999000, i64 100, i32 1}
!13 = !{i32 999999, i64 1, i32 2}
!14 = !{!"function_entry_count", i64 0}
